Merge pull request #17570 from HannibalAPE:text_det_recog_demo
[GSoC] High Level API and Samples for Scene Text Detection and Recognition * APIs and samples for scene text detection and recognition * update APIs and tutorial for Text Detection and Recognition * API updates: (1) put decodeType into struct Voc (2) optimize the post-processing of DB * sample update: (1) add transformation into scene_text_spotting.cpp (2) modify text_detection.cpp with API update * update tutorial * simplify text recognition API update tutorial * update impl usage in recognize() and detect() * dnn: refactoring public API of TextRecognitionModel/TextDetectionModel * update provided models update opencv.bib * dnn: adjust text rectangle angle * remove points ordering operation in model.cpp * update gts of DB test in test_model.cpp * dnn: ensure to keep text rectangle angle - avoid 90/180 degree turns * dnn(text): use quadrangle result in TextDetectionModel API * dnn: update Text Detection API (1) keep points' order consistent with (bl, tl, tr, br) in unclip (2) update contourScore with boundingRect
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
|
||||
String keys =
|
||||
"{ help h | | Print help message. }"
|
||||
"{ inputImage i | | Path to an input image. Skip this argument to capture frames from a camera. }"
|
||||
"{ modelPath mp | | Path to a binary .onnx file contains trained CRNN text recognition model. "
|
||||
"Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
|
||||
"{ RGBInput rgb |0| 0: imread with flags=IMREAD_GRAYSCALE; 1: imread with flags=IMREAD_COLOR. }"
|
||||
"{ evaluate e |false| false: predict with input images; true: evaluate on benchmarks. }"
|
||||
"{ evalDataPath edp | | Path to benchmarks for evaluation. "
|
||||
"Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
|
||||
"{ vocabularyPath vp | alphabet_36.txt | Path to recognition vocabulary. "
|
||||
"Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}";
|
||||
|
||||
String convertForEval(String &input);
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// Parse arguments
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
parser.about("Use this script to run the PyTorch implementation of "
|
||||
"An End-to-End Trainable Neural Network for Image-based SequenceRecognition and Its Application to Scene Text Recognition "
|
||||
"(https://arxiv.org/abs/1507.05717)");
|
||||
if (argc == 1 || parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
String modelPath = parser.get<String>("modelPath");
|
||||
String vocPath = parser.get<String>("vocabularyPath");
|
||||
int imreadRGB = parser.get<int>("RGBInput");
|
||||
|
||||
if (!parser.check())
|
||||
{
|
||||
parser.printErrors();
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Load the network
|
||||
CV_Assert(!modelPath.empty());
|
||||
TextRecognitionModel recognizer(modelPath);
|
||||
|
||||
// Load vocabulary
|
||||
CV_Assert(!vocPath.empty());
|
||||
std::ifstream vocFile;
|
||||
vocFile.open(samples::findFile(vocPath));
|
||||
CV_Assert(vocFile.is_open());
|
||||
String vocLine;
|
||||
std::vector<String> vocabulary;
|
||||
while (std::getline(vocFile, vocLine)) {
|
||||
vocabulary.push_back(vocLine);
|
||||
}
|
||||
recognizer.setVocabulary(vocabulary);
|
||||
recognizer.setDecodeType("CTC-greedy");
|
||||
|
||||
// Set parameters
|
||||
double scale = 1.0 / 127.5;
|
||||
Scalar mean = Scalar(127.5, 127.5, 127.5);
|
||||
Size inputSize = Size(100, 32);
|
||||
recognizer.setInputParams(scale, inputSize, mean);
|
||||
|
||||
if (parser.get<bool>("evaluate"))
|
||||
{
|
||||
// For evaluation
|
||||
String evalDataPath = parser.get<String>("evalDataPath");
|
||||
CV_Assert(!evalDataPath.empty());
|
||||
String gtPath = evalDataPath + "/test_gts.txt";
|
||||
std::ifstream evalGts;
|
||||
evalGts.open(gtPath);
|
||||
CV_Assert(evalGts.is_open());
|
||||
|
||||
String gtLine;
|
||||
int cntRight=0, cntAll=0;
|
||||
TickMeter timer;
|
||||
timer.reset();
|
||||
|
||||
while (std::getline(evalGts, gtLine)) {
|
||||
size_t splitLoc = gtLine.find_first_of(' ');
|
||||
String imgPath = evalDataPath + '/' + gtLine.substr(0, splitLoc);
|
||||
String gt = gtLine.substr(splitLoc+1);
|
||||
|
||||
// Inference
|
||||
Mat frame = imread(samples::findFile(imgPath), imreadRGB);
|
||||
CV_Assert(!frame.empty());
|
||||
timer.start();
|
||||
std::string recognitionResult = recognizer.recognize(frame);
|
||||
timer.stop();
|
||||
|
||||
if (gt == convertForEval(recognitionResult))
|
||||
cntRight++;
|
||||
|
||||
cntAll++;
|
||||
}
|
||||
std::cout << "Accuracy(%): " << (double)(cntRight) / (double)(cntAll) << std::endl;
|
||||
std::cout << "Average Inference Time(ms): " << timer.getTimeMilli() / (double)(cntAll) << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create a window
|
||||
static const std::string winName = "Input Cropped Image";
|
||||
|
||||
// Open an image file
|
||||
CV_Assert(parser.has("inputImage"));
|
||||
Mat frame = imread(samples::findFile(parser.get<String>("inputImage")), imreadRGB);
|
||||
CV_Assert(!frame.empty());
|
||||
|
||||
// Recognition
|
||||
std::string recognitionResult = recognizer.recognize(frame);
|
||||
|
||||
imshow(winName, frame);
|
||||
std::cout << "Predition: '" << recognitionResult << "'" << std::endl;
|
||||
waitKey();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert the predictions to lower case, and remove other characters.
|
||||
// Only for Evaluation
|
||||
String convertForEval(String & input)
|
||||
{
|
||||
String output;
|
||||
for (uint i = 0; i < input.length(); i++){
|
||||
char ch = input[i];
|
||||
if ((int)ch >= 97 && (int)ch <= 122) {
|
||||
output.push_back(ch);
|
||||
} else if ((int)ch >= 65 && (int)ch <= 90) {
|
||||
output.push_back((char)(ch + 32));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
Reference in New Issue
Block a user