Merge pull request #17570 from HannibalAPE:text_det_recog_demo

[GSoC] High Level API and Samples for Scene Text Detection and Recognition * APIs and samples for scene text detection and recognition * update APIs and tutorial for Text Detection and Recognition * API updates: (1) put decodeType into struct Voc (2) optimize the post-processing of DB * sample update: (1) add transformation into scene_text_spotting.cpp (2) modify text_detection.cpp with API update * update tutorial * simplify text recognition API update tutorial * update impl usage in recognize() and detect() * dnn: refactoring public API of TextRecognitionModel/TextDetectionModel * update provided models update opencv.bib * dnn: adjust text rectangle angle * remove points ordering operation in model.cpp * update gts of DB test in test_model.cpp * dnn: ensure to keep text rectangle angle - avoid 90/180 degree turns * dnn(text): use quadrangle result in TextDetectionModel API * dnn: update Text Detection API (1) keep points' order consistent with (bl, tl, tr, br) in unclip (2) update contourScore with boundingRect
2020-12-04 02:47:40 +08:00
parent 5ecf693774
commit 22d64ae08f
19 changed files with 2339 additions and 181 deletions
@@ -0,0 +1,144 @@
+#include <iostream>
+#include <fstream>
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/dnn/dnn.hpp>
+
+using namespace cv;
+using namespace cv::dnn;
+
+String keys =
+        "{ help  h                          | | Print help message. }"
+        "{ inputImage i                     | | Path to an input image. Skip this argument to capture frames from a camera. }"
+        "{ modelPath mp                     | | Path to a binary .onnx file contains trained CRNN text recognition model. "
+            "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
+        "{ RGBInput rgb                     |0| 0: imread with flags=IMREAD_GRAYSCALE; 1: imread with flags=IMREAD_COLOR. }"
+        "{ evaluate e                       |false| false: predict with input images; true: evaluate on benchmarks. }"
+        "{ evalDataPath edp                 | | Path to benchmarks for evaluation. "
+            "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
+        "{ vocabularyPath vp                | alphabet_36.txt | Path to recognition vocabulary. "
+            "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}";
+
+String convertForEval(String &input);
+
+int main(int argc, char** argv)
+{
+    // Parse arguments
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Use this script to run the PyTorch implementation of "
+                 "An End-to-End Trainable Neural Network for Image-based SequenceRecognition and Its Application to Scene Text Recognition "
+                 "(https://arxiv.org/abs/1507.05717)");
+    if (argc == 1 || parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    String modelPath = parser.get<String>("modelPath");
+    String vocPath = parser.get<String>("vocabularyPath");
+    int imreadRGB = parser.get<int>("RGBInput");
+
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 1;
+    }
+
+    // Load the network
+    CV_Assert(!modelPath.empty());
+    TextRecognitionModel recognizer(modelPath);
+
+    // Load vocabulary
+    CV_Assert(!vocPath.empty());
+    std::ifstream vocFile;
+    vocFile.open(samples::findFile(vocPath));
+    CV_Assert(vocFile.is_open());
+    String vocLine;
+    std::vector<String> vocabulary;
+    while (std::getline(vocFile, vocLine)) {
+        vocabulary.push_back(vocLine);
+    }
+    recognizer.setVocabulary(vocabulary);
+    recognizer.setDecodeType("CTC-greedy");
+
+    // Set parameters
+    double scale = 1.0 / 127.5;
+    Scalar mean = Scalar(127.5, 127.5, 127.5);
+    Size inputSize = Size(100, 32);
+    recognizer.setInputParams(scale, inputSize, mean);
+
+    if (parser.get<bool>("evaluate"))
+    {
+        // For evaluation
+        String evalDataPath = parser.get<String>("evalDataPath");
+        CV_Assert(!evalDataPath.empty());
+        String gtPath = evalDataPath + "/test_gts.txt";
+        std::ifstream evalGts;
+        evalGts.open(gtPath);
+        CV_Assert(evalGts.is_open());
+
+        String gtLine;
+        int cntRight=0, cntAll=0;
+        TickMeter timer;
+        timer.reset();
+
+        while (std::getline(evalGts, gtLine)) {
+            size_t splitLoc = gtLine.find_first_of(' ');
+            String imgPath = evalDataPath + '/' + gtLine.substr(0, splitLoc);
+            String gt = gtLine.substr(splitLoc+1);
+
+            // Inference
+            Mat frame = imread(samples::findFile(imgPath), imreadRGB);
+            CV_Assert(!frame.empty());
+            timer.start();
+            std::string recognitionResult = recognizer.recognize(frame);
+            timer.stop();
+
+            if (gt == convertForEval(recognitionResult))
+                cntRight++;
+
+            cntAll++;
+        }
+        std::cout << "Accuracy(%): " << (double)(cntRight) / (double)(cntAll) << std::endl;
+        std::cout << "Average Inference Time(ms): " << timer.getTimeMilli() / (double)(cntAll) << std::endl;
+    }
+    else
+    {
+        // Create a window
+        static const std::string winName = "Input Cropped Image";
+
+        // Open an image file
+        CV_Assert(parser.has("inputImage"));
+        Mat frame = imread(samples::findFile(parser.get<String>("inputImage")), imreadRGB);
+        CV_Assert(!frame.empty());
+
+        // Recognition
+        std::string recognitionResult = recognizer.recognize(frame);
+
+        imshow(winName, frame);
+        std::cout << "Predition: '" << recognitionResult << "'" << std::endl;
+        waitKey();
+    }
+
+    return 0;
+}
+
+// Convert the predictions to lower case, and remove other characters.
+// Only for Evaluation
+String convertForEval(String & input)
+{
+    String output;
+    for (uint i = 0; i < input.length(); i++){
+        char ch = input[i];
+        if ((int)ch >= 97 && (int)ch <= 122) {
+            output.push_back(ch);
+        } else if ((int)ch >= 65 && (int)ch <= 90) {
+            output.push_back((char)(ch + 32));
+        } else {
+            continue;
+        }
+    }
+
+    return output;
+}