diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index 0611eccc5b..144d59e060 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -129,9 +129,9 @@ endif() if(INF_ENGINE_TARGET) if(NOT INF_ENGINE_RELEASE) - message(WARNING "InferenceEngine version has not been set, 2020.2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + message(WARNING "InferenceEngine version has not been set, 2020.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") endif() - set(INF_ENGINE_RELEASE "2020020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") + set(INF_ENGINE_RELEASE "2020030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set_target_properties(${INF_ENGINE_TARGET} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) diff --git a/cmake/OpenCVFindMKL.cmake b/cmake/OpenCVFindMKL.cmake index 19a76ddf57..141481ed42 100644 --- a/cmake/OpenCVFindMKL.cmake +++ b/cmake/OpenCVFindMKL.cmake @@ -79,9 +79,10 @@ get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h) #determine arch if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) - set(MKL_X64 1) - set(MKL_ARCH "intel64") - + set(MKL_ARCH_LIST "intel64") + if(MSVC) + list(APPEND MKL_ARCH_LIST "win-x64") + endif() include(CheckTypeSize) CHECK_TYPE_SIZE(int _sizeof_int) if (_sizeof_int EQUAL 4) @@ -90,14 +91,19 @@ if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) set(MKL_ARCH_SUFFIX "ilp64") endif() else() - set(MKL_ARCH "ia32") + set(MKL_ARCH_LIST "ia32") set(MKL_ARCH_SUFFIX "c") endif() if(MKL_VERSION_STR VERSION_GREATER "11.3.0" OR MKL_VERSION_STR VERSION_EQUAL "11.3.0") set(mkl_lib_find_paths - ${MKL_ROOT_DIR}/lib - ${MKL_ROOT_DIR}/lib/${MKL_ARCH} ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}) + ${MKL_ROOT_DIR}/lib) + foreach(MKL_ARCH ${MKL_ARCH_LIST}) + list(APPEND mkl_lib_find_paths + ${MKL_ROOT_DIR}/lib/${MKL_ARCH} + ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH} + ${MKL_ROOT_DIR}/${MKL_ARCH}) + endforeach() set(mkl_lib_list "mkl_intel_${MKL_ARCH_SUFFIX}") @@ -121,7 +127,7 @@ endif() set(MKL_LIBRARIES "") foreach(lib ${mkl_lib_list}) - find_library(${lib} ${lib} ${mkl_lib_find_paths}) + find_library(${lib} NAMES ${lib} ${lib}_dll HINTS ${mkl_lib_find_paths}) mark_as_advanced(${lib}) if(NOT ${lib}) mkl_fail() diff --git a/doc/tutorials/introduction/macos_install/macos_install.markdown b/doc/tutorials/introduction/macos_install/macos_install.markdown index 8321897a15..99d5e65f9a 100644 --- a/doc/tutorials/introduction/macos_install/macos_install.markdown +++ b/doc/tutorials/introduction/macos_install/macos_install.markdown @@ -30,7 +30,7 @@ Installing CMake -# Install the dmg package and launch it from Applications. That will give you the UI app of CMake --# From the CMake app window, choose menu Tools --> Install For Command Line Use. +-# From the CMake app window, choose menu Tools --> How to Install For Command Line Use. Then, follow the instructions from the pop-up there. -# Install folder will be /usr/bin/ by default, submit it by choosing Install command line links. @@ -66,7 +66,7 @@ git clone https://github.com/opencv/opencv_contrib.git Building OpenCV from Source Using CMake --------------------------------------- --# Create a temporary directory, which we denote as ``, where you want to put +-# Create a temporary directory, which we denote as `build_opencv`, where you want to put the generated Makefiles, project files as well the object files and output binaries and enter there. @@ -87,8 +87,8 @@ Building OpenCV from Source Using CMake or cmake-gui - - set full path to OpenCV source code, e.g. `/home/user/opencv` - - set full path to ``, e.g. `/home/user/build_opencv` + - set the OpenCV source code path to, e.g. `/home/user/opencv` + - set the binary build path to your CMake build directory, e.g. `/home/user/build_opencv` - set optional parameters - run: "Configure" - run: "Generate" diff --git a/modules/core/include/opencv2/core/eigen.hpp b/modules/core/include/opencv2/core/eigen.hpp index 8afc06caa7..3d7ba8fa14 100644 --- a/modules/core/include/opencv2/core/eigen.hpp +++ b/modules/core/include/opencv2/core/eigen.hpp @@ -66,10 +66,18 @@ namespace cv { -//! @addtogroup core_eigen +/** @addtogroup core_eigen +These functions are provided for OpenCV-Eigen interoperability. They convert `Mat` +objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen +documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for +information about the `Matrix` template type. + +@note Using these functions requires the `Eigen/Dense` or similar header to be +included before this header. +*/ //! @{ -#ifdef OPENCV_EIGEN_TENSOR_SUPPORT +#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN) /** @brief Converts an Eigen::Tensor to a cv::Mat. The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where: diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index ee6b7b1d43..db57521f85 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2248,7 +2248,7 @@ struct Net::Impl : public detail::NetImplBase auto ieInpNode = inputNodes[i].dynamicCast(); CV_Assert(oid < ieInpNode->node->get_output_size()); -#if INF_ENGINE_VER_MAJOR_GT(2020030000) +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); #else inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false))); diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index cf94500a8c..e1bf738313 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -82,7 +82,7 @@ public: return type_info; } -#if INF_ENGINE_VER_MAJOR_GT(2020020000) +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3) NgraphCustomOp(const ngraph::OutputVector& inputs, #else NgraphCustomOp(const ngraph::NodeVector& inputs, @@ -112,7 +112,7 @@ public: std::shared_ptr copy_with_new_args(const ngraph::NodeVector& new_args) const override { -#if INF_ENGINE_VER_MAJOR_GT(2020020000) +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3) return std::make_shared(ngraph::as_output_vector(new_args), params); #else return std::make_shared(new_args, params); @@ -239,7 +239,9 @@ private: class InfEngineNgraphExtension : public InferenceEngine::IExtension { public: +#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2) virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {} +#endif virtual void Unload() noexcept {} virtual void Release() noexcept {} virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {} @@ -283,7 +285,7 @@ InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& n {"internals", shapesToStr(internals)} }; -#if INF_ENGINE_VER_MAJOR_GT(2020020000) +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3) ngraph::OutputVector inp_nodes; #else ngraph::NodeVector inp_nodes; diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index cbe77d9107..5bca15ea1e 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -25,10 +25,11 @@ #define INF_ENGINE_RELEASE_2019R3 2019030000 #define INF_ENGINE_RELEASE_2020_1 2020010000 #define INF_ENGINE_RELEASE_2020_2 2020020000 +#define INF_ENGINE_RELEASE_2020_3 2020030000 #ifndef INF_ENGINE_RELEASE -#warning("IE version have not been provided via command-line. Using 2020.2 by default") -#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_2 +#warning("IE version have not been provided via command-line. Using 2020.3 by default") +#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_3 #endif #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) @@ -226,7 +227,9 @@ private: class InfEngineExtension : public InferenceEngine::IExtension { public: +#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2) virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {} +#endif virtual void Unload() noexcept {} virtual void Release() noexcept {} virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {} diff --git a/samples/dnn/text_detection.py b/samples/dnn/text_detection.py index 9ea4c10190..7014a80148 100644 --- a/samples/dnn/text_detection.py +++ b/samples/dnn/text_detection.py @@ -1,25 +1,81 @@ +''' + Text detection model: https://github.com/argman/EAST + Download link: https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1 + Text recognition model taken from here: https://github.com/meijieru/crnn.pytorch + How to convert from pb to onnx: + Using classes from here: https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py + import torch + import models.crnn as CRNN + model = CRNN(32, 1, 37, 256) + model.load_state_dict(torch.load('crnn.pth')) + dummy_input = torch.randn(1, 1, 32, 100) + torch.onnx.export(model, dummy_input, "crnn.onnx", verbose=True) +''' + + # Import required modules +import numpy as np import cv2 as cv import math import argparse ############ Add argument parser for command line arguments ############ -parser = argparse.ArgumentParser(description='Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)') -parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary .pb file of model contains trained weights.') +parser = argparse.ArgumentParser( + description="Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of " + "EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)" + "The OCR model can be obtained from converting the pretrained CRNN model to .onnx format from the github repository https://github.com/meijieru/crnn.pytorch") +parser.add_argument('--input', + help='Path to input image or video file. Skip this argument to capture frames from a camera.') +parser.add_argument('--model', '-m', required=True, + help='Path to a binary .pb file contains trained detector network.') +parser.add_argument('--ocr', default="crnn.onnx", + help="Path to a binary .pb or .onnx file contains trained recognition network", ) parser.add_argument('--width', type=int, default=320, help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') -parser.add_argument('--height',type=int, default=320, +parser.add_argument('--height', type=int, default=320, help='Preprocess input image by resizing to a specific height. It should be multiple by 32.') -parser.add_argument('--thr',type=float, default=0.5, +parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold.') -parser.add_argument('--nms',type=float, default=0.4, +parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold.') args = parser.parse_args() + ############ Utility functions ############ -def decode(scores, geometry, scoreThresh): + +def fourPointsTransform(frame, vertices): + vertices = np.asarray(vertices) + outputSize = (100, 32) + targetVertices = np.array([ + [0, outputSize[1] - 1], + [0, 0], + [outputSize[0] - 1, 0], + [outputSize[0] - 1, outputSize[1] - 1]], dtype="float32") + + rotationMatrix = cv.getPerspectiveTransform(vertices, targetVertices) + result = cv.warpPerspective(frame, rotationMatrix, outputSize) + return result + + +def decodeText(scores): + text = "" + alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" + for i in range(scores.shape[0]): + c = np.argmax(scores[i][0]) + if c != 0: + text += alphabet[c - 1] + else: + text += '-' + + # adjacent same letters as well as background text must be removed to get the final output + char_list = [] + for i in range(len(text)): + if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])): + char_list.append(text[i]) + return ''.join(char_list) + + +def decodeBoundingBoxes(scores, geometry, scoreThresh): detections = [] confidences = [] @@ -47,7 +103,7 @@ def decode(scores, geometry, scoreThresh): score = scoresData[x] # If score is lower than threshold score, move to next x - if(score < scoreThresh): + if (score < scoreThresh): continue # Calculate offset @@ -66,24 +122,27 @@ def decode(scores, geometry, scoreThresh): # Find points for rectangle p1 = (-sinA * h + offset[0], -cosA * h + offset[1]) - p3 = (-cosA * w + offset[0], sinA * w + offset[1]) - center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1])) - detections.append((center, (w,h), -1*angle * 180.0 / math.pi)) + p3 = (-cosA * w + offset[0], sinA * w + offset[1]) + center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1])) + detections.append((center, (w, h), -1 * angle * 180.0 / math.pi)) confidences.append(float(score)) # Return detections and confidences return [detections, confidences] + def main(): # Read and store arguments confThreshold = args.thr nmsThreshold = args.nms inpWidth = args.width inpHeight = args.height - model = args.model + modelDetector = args.model + modelRecognition = args.ocr # Load network - net = cv.dnn.readNet(model) + detector = cv.dnn.readNet(modelDetector) + recognizer = cv.dnn.readNet(modelRecognition) # Create a new named window kWinName = "EAST: An Efficient and Accurate Scene Text Detector" @@ -95,6 +154,7 @@ def main(): # Open a video file or an image file or a camera stream cap = cv.VideoCapture(args.input if args.input else 0) + tickmeter = cv.TickMeter() while cv.waitKey(1) < 0: # Read frame hasFrame, frame = cap.read() @@ -111,19 +171,20 @@ def main(): # Create a 4D blob from frame. blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) - # Run the model - net.setInput(blob) - outs = net.forward(outNames) - t, _ = net.getPerfProfile() - label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) + # Run the detection model + detector.setInput(blob) + + tickmeter.start() + outs = detector.forward(outNames) + tickmeter.stop() # Get scores and geometry scores = outs[0] geometry = outs[1] - [boxes, confidences] = decode(scores, geometry, confThreshold) + [boxes, confidences] = decodeBoundingBoxes(scores, geometry, confThreshold) # Apply NMS - indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold) + indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold) for i in indices: # get 4 corners of the rotated rect vertices = cv.boxPoints(boxes[i[0]]) @@ -131,16 +192,40 @@ def main(): for j in range(4): vertices[j][0] *= rW vertices[j][1] *= rH + + + # get cropped image using perspective transform + if modelRecognition: + cropped = fourPointsTransform(frame, vertices) + cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY) + + # Create a 4D blob from cropped image + blob = cv.dnn.blobFromImage(cropped, size=(100, 32), mean=127.5, scalefactor=1 / 127.5) + recognizer.setInput(blob) + + # Run the recognition model + tickmeter.start() + result = recognizer.forward() + tickmeter.stop() + + # decode the result into text + wordRecognized = decodeText(result) + cv.putText(frame, wordRecognized, (int(vertices[1][0]), int(vertices[1][1])), cv.FONT_HERSHEY_SIMPLEX, + 0.5, (255, 0, 0)) + for j in range(4): p1 = (vertices[j][0], vertices[j][1]) p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) cv.line(frame, p1, p2, (0, 255, 0), 1) # Put efficiency information + label = 'Inference time: %.2f ms' % (tickmeter.getTimeMilli()) cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) # Display the frame - cv.imshow(kWinName,frame) + cv.imshow(kWinName, frame) + tickmeter.reset() + if __name__ == "__main__": main()