diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp index a82235b7c3..126393fae1 100644 --- a/modules/core/include/opencv2/core/persistence.hpp +++ b/modules/core/include/opencv2/core/persistence.hpp @@ -542,6 +542,11 @@ public: */ CV_WRAP_AS(at) FileNode operator[](int i) const; + /** @brief Returns keys of a mapping node. + @returns Keys of a mapping node. + */ + CV_WRAP std::vector keys() const; + /** @brief Returns type of the node. @returns Type of the node. See FileNode::Type */ diff --git a/modules/core/src/persistence_cpp.cpp b/modules/core/src/persistence_cpp.cpp index b1cef6be95..334c787c98 100644 --- a/modules/core/src/persistence_cpp.cpp +++ b/modules/core/src/persistence_cpp.cpp @@ -269,6 +269,20 @@ FileNode FileNode::operator[](int i) const i == 0 ? *this : FileNode(); } +std::vector FileNode::keys() const +{ + std::vector res; + if (isMap()) + { + res.reserve(size()); + for (FileNodeIterator it = begin(); it != end(); ++it) + { + res.push_back((*it).name()); + } + } + return res; +} + String FileNode::name() const { const char* str; diff --git a/samples/dnn/classification.cpp b/samples/dnn/classification.cpp index 42bdc20dd2..0ae9e6ed94 100644 --- a/samples/dnn/classification.cpp +++ b/samples/dnn/classification.cpp @@ -5,21 +5,15 @@ #include #include -const char* keys = +#include "common.hpp" + +std::string keys = "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | models.yml | An optional path to file with preprocessing parameters }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" - "{ model m | | Path to a binary file of model contains trained weights. " - "It could be a file with extensions .caffemodel (Caffe), " - ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" - "{ config c | | Path to a text file of model contains network configuration. " - "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes. }" - "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | | Preprocess input image by resizing to a specific width. }" - "{ height | | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ backend | 0 | Choose one of computation backends: " "0: automatically (by default), " "1: Halide language (http://halide-lang.org/), " @@ -39,6 +33,13 @@ std::vector classes; int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); + + const std::string modelName = parser.get("@alias"); + const std::string zooFile = parser.get("zoo"); + + keys += genPreprocArguments(modelName, zooFile); + + parser = CommandLineParser(argc, argv, keys); parser.about("Use this script to run classification deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { @@ -51,8 +52,8 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); - String model = parser.get("model"); - String config = parser.get("config"); + String model = findFile(parser.get("model")); + String config = findFile(parser.get("config")); String framework = parser.get("framework"); int backendId = parser.get("backend"); int targetId = parser.get("target"); diff --git a/samples/dnn/classification.py b/samples/dnn/classification.py index 9a610d1ab7..5a2373d363 100644 --- a/samples/dnn/classification.py +++ b/samples/dnn/classification.py @@ -1,35 +1,19 @@ import cv2 as cv import argparse import numpy as np -import sys + +from common import * backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run classification deep learning networks using OpenCV.') +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary file of model contains trained weights. ' - 'It could be a file with extensions .caffemodel (Caffe), ' - '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') -parser.add_argument('--config', - help='Path to a text file of model contains network configuration. ' - 'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') -parser.add_argument('--classes', help='Optional path to a text file with names of classes.') -parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], - help='Preprocess input image by subtracting mean values. ' - 'Mean values should be in BGR order.') -parser.add_argument('--scale', type=float, default=1.0, - help='Preprocess input image by multiplying on a scale factor.') -parser.add_argument('--width', type=int, required=True, - help='Preprocess input image by resizing to a specific width.') -parser.add_argument('--height', type=int, required=True, - help='Preprocess input image by resizing to a specific height.') -parser.add_argument('--rgb', action='store_true', - help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -42,8 +26,17 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() +add_preproc_args(args.zoo, parser, 'classification') +parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run classification deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() +args.model = findFile(args.model) +args.config = findFile(args.config) +args.classes = findFile(args.classes) + # Load names of classes classes = None if args.classes: @@ -66,7 +59,9 @@ while cv.waitKey(1) < 0: break # Create a 4D blob from a frame. - blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) + inpWidth = args.width if args.width else frame.shape[1] + inpHeight = args.height if args.height else frame.shape[0] + blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) # Run a model net.setInput(blob) diff --git a/samples/dnn/common.hpp b/samples/dnn/common.hpp new file mode 100644 index 0000000000..a0ca012645 --- /dev/null +++ b/samples/dnn/common.hpp @@ -0,0 +1,94 @@ +#include + +using namespace cv; + +std::string genArgument(const std::string& argName, const std::string& help, + const std::string& modelName, const std::string& zooFile, + char key = ' ', std::string defaultVal = ""); + +std::string genPreprocArguments(const std::string& modelName, const std::string& zooFile); + +std::string findFile(const std::string& filename); + +std::string genArgument(const std::string& argName, const std::string& help, + const std::string& modelName, const std::string& zooFile, + char key, std::string defaultVal) +{ + if (!modelName.empty()) + { + FileStorage fs(zooFile, FileStorage::READ); + if (fs.isOpened()) + { + FileNode node = fs[modelName]; + if (!node.empty()) + { + FileNode value = node[argName]; + if (!value.empty()) + { + if (value.isReal()) + defaultVal = format("%f", (float)value); + else if (value.isString()) + defaultVal = (std::string)value; + else if (value.isInt()) + defaultVal = format("%d", (int)value); + else if (value.isSeq()) + { + for (size_t i = 0; i < value.size(); ++i) + { + FileNode v = value[(int)i]; + if (v.isInt()) + defaultVal += format("%d ", (int)v); + else if (v.isReal()) + defaultVal += format("%f ", (float)v); + else + CV_Error(Error::StsNotImplemented, "Unexpected value format"); + } + } + else + CV_Error(Error::StsNotImplemented, "Unexpected field format"); + } + } + } + } + return "{ " + argName + " " + key + " | " + defaultVal + " | " + help + " }"; +} + +std::string findFile(const std::string& filename) +{ + if (filename.empty() || utils::fs::exists(filename)) + return filename; + + std::string extraPaths[] = {getenv("OPENCV_DNN_TEST_DATA_PATH"), + getenv("OPENCV_TEST_DATA_PATH")}; + for (int i = 0; i < 2; ++i) + { + std::string absPath = utils::fs::join(extraPaths[i], utils::fs::join("dnn", filename)); + if (utils::fs::exists(absPath)) + return absPath; + } + CV_Error(Error::StsObjectNotFound, "File " + filename + " not found! " + "Please specify a path to /opencv_extra/testdata in OPENCV_DNN_TEST_DATA_PATH " + "environment variable or pass a full path to model."); + return ""; +} + +std::string genPreprocArguments(const std::string& modelName, const std::string& zooFile) +{ + return genArgument("model", "Path to a binary file of model contains trained weights. " + "It could be a file with extensions .caffemodel (Caffe), " + ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet), .bin (OpenVINO).", + modelName, zooFile, 'm') + + genArgument("config", "Path to a text file of model contains network configuration. " + "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet), .xml (OpenVINO).", + modelName, zooFile, 'c') + + genArgument("mean", "Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces.", + modelName, zooFile) + + genArgument("scale", "Preprocess input image by multiplying on a scale factor.", + modelName, zooFile, ' ', "1.0") + + genArgument("width", "Preprocess input image by resizing to a specific width.", + modelName, zooFile, ' ', "-1") + + genArgument("height", "Preprocess input image by resizing to a specific height.", + modelName, zooFile, ' ', "-1") + + genArgument("rgb", "Indicate that model works with RGB input images instead BGR ones.", + modelName, zooFile); +} diff --git a/samples/dnn/common.py b/samples/dnn/common.py new file mode 100644 index 0000000000..feafdc9d02 --- /dev/null +++ b/samples/dnn/common.py @@ -0,0 +1,108 @@ +import sys +import os +import cv2 as cv + + +def add_argument(zoo, parser, name, help, required=False, default=None, type=None, action=None, nargs=None): + if len(sys.argv) <= 1: + return + + modelName = sys.argv[1] + + if os.path.isfile(zoo): + fs = cv.FileStorage(zoo, cv.FILE_STORAGE_READ) + node = fs.getNode(modelName) + if not node.empty(): + value = node.getNode(name) + if not value.empty(): + if value.isReal(): + default = value.real() + elif value.isString(): + default = value.string() + elif value.isInt(): + default = int(value.real()) + elif value.isSeq(): + default = [] + for i in range(value.size()): + v = value.at(i) + if v.isInt(): + default.append(int(v.real())) + elif v.isReal(): + default.append(v.real()) + else: + print('Unexpected value format') + exit(0) + else: + print('Unexpected field format') + exit(0) + required = False + + if action == 'store_true': + default = 1 if default == 'true' else (0 if default == 'false' else default) + assert(default is None or default == 0 or default == 1) + parser.add_argument('--' + name, required=required, help=help, default=bool(default), + action=action) + else: + parser.add_argument('--' + name, required=required, help=help, default=default, + action=action, nargs=nargs, type=type) + + +def add_preproc_args(zoo, parser, sample): + aliases = [] + if os.path.isfile(zoo): + fs = cv.FileStorage(zoo, cv.FILE_STORAGE_READ) + root = fs.root() + for name in root.keys(): + model = root.getNode(name) + if model.getNode('sample').string() == sample: + aliases.append(name) + + parser.add_argument('alias', nargs='?', choices=aliases, + help='An alias name of model to extract preprocessing parameters from models.yml file.') + add_argument(zoo, parser, 'model', required=True, + help='Path to a binary file of model contains trained weights. ' + 'It could be a file with extensions .caffemodel (Caffe), ' + '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet), .bin (OpenVINO)') + add_argument(zoo, parser, 'config', + help='Path to a text file of model contains network configuration. ' + 'It could be a file with extensions .prototxt (Caffe), .pbtxt or .config (TensorFlow), .cfg (Darknet), .xml (OpenVINO)') + add_argument(zoo, parser, 'mean', nargs='+', type=float, default=[0, 0, 0], + help='Preprocess input image by subtracting mean values. ' + 'Mean values should be in BGR order.') + add_argument(zoo, parser, 'scale', type=float, default=1.0, + help='Preprocess input image by multiplying on a scale factor.') + add_argument(zoo, parser, 'width', type=int, + help='Preprocess input image by resizing to a specific width.') + add_argument(zoo, parser, 'height', type=int, + help='Preprocess input image by resizing to a specific height.') + add_argument(zoo, parser, 'rgb', action='store_true', + help='Indicate that model works with RGB input images instead BGR ones.') + add_argument(zoo, parser, 'classes', + help='Optional path to a text file with names of classes to label detected objects.') + + +def findFile(filename): + if filename: + if os.path.exists(filename): + return filename + + samplesDataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), + '..', + 'data', + 'dnn') + if os.path.exists(os.path.join(samplesDataDir, filename)): + return os.path.join(samplesDataDir, filename) + + for path in ['OPENCV_DNN_TEST_DATA_PATH', 'OPENCV_TEST_DATA_PATH']: + try: + extraPath = os.environ[path] + absPath = os.path.join(extraPath, 'dnn', filename) + if os.path.exists(absPath): + return absPath + except KeyError: + pass + + print('File ' + filename + ' not found! Please specify a path to ' + '/opencv_extra/testdata in OPENCV_DNN_TEST_DATA_PATH environment ' + 'variable or pass a full path to model.') + exit(0) diff --git a/samples/dnn/models.yml b/samples/dnn/models.yml new file mode 100644 index 0000000000..0e7198a660 --- /dev/null +++ b/samples/dnn/models.yml @@ -0,0 +1,117 @@ +%YAML:1.0 + +################################################################################ +# Object detection models. +################################################################################ + +# OpenCV's face detection network +opencv_fd: + model: "opencv_face_detector.caffemodel" + config: "opencv_face_detector.prototxt" + mean: [104, 177, 123] + scale: 1.0 + width: 300 + height: 300 + rgb: false + sample: "object_detection" + +# YOLO object detection family from Darknet (https://pjreddie.com/darknet/yolo/) +# Might be used for all YOLOv2, TinyYolov2 and YOLOv3 +yolo: + model: "yolov3.weights" + config: "yolov3.cfg" + mean: [0, 0, 0] + scale: 0.00392 + width: 416 + height: 416 + rgb: true + classes: "object_detection_classes_yolov3.txt" + sample: "object_detection" + +tiny-yolo-voc: + model: "tiny-yolo-voc.weights" + config: "tiny-yolo-voc.cfg" + mean: [0, 0, 0] + scale: 0.00392 + width: 416 + height: 416 + rgb: true + classes: "object_detection_classes_pascal_voc.txt" + sample: "object_detection" + +# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD +ssd_caffe: + model: "MobileNetSSD_deploy.caffemodel" + config: "MobileNetSSD_deploy.prototxt" + mean: [127.5, 127.5, 127.5] + scale: 0.007843 + width: 300 + height: 300 + rgb: false + classes: "object_detection_classes_pascal_voc.txt" + sample: "object_detection" + +# TensorFlow implementation of SSD model from https://github.com/tensorflow/models/tree/master/research/object_detection +ssd_tf: + model: "ssd_mobilenet_v1_coco_2017_11_17.pb" + config: "ssd_mobilenet_v1_coco_2017_11_17.pbtxt" + mean: [0, 0, 0] + scale: 1.0 + width: 300 + height: 300 + rgb: true + classes: "object_detection_classes_coco.txt" + sample: "object_detection" + +# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection +faster_rcnn_tf: + model: "faster_rcnn_inception_v2_coco_2018_01_28.pb" + config: "faster_rcnn_inception_v2_coco_2018_01_28.pbtxt" + mean: [0, 0, 0] + scale: 1.0 + width: 800 + height: 600 + rgb: true + sample: "object_detection" + +################################################################################ +# Image classification models. +################################################################################ + +# SqueezeNet v1.1 from https://github.com/DeepScale/SqueezeNet +squeezenet: + model: "squeezenet_v1.1.caffemodel" + config: "squeezenet_v1.1.prototxt" + mean: [0, 0, 0] + scale: 1.0 + width: 227 + height: 227 + rgb: false + classes: "classification_classes_ILSVRC2012.txt" + sample: "classification" + +################################################################################ +# Semantic segmentation models. +################################################################################ + +# ENet road scene segmentation network from https://github.com/e-lab/ENet-training +# Works fine for different input sizes. +enet: + model: "Enet-model-best.net" + mean: [0, 0, 0] + scale: 0.00392 + width: 512 + height: 256 + rgb: true + classes: "enet-classes.txt" + sample: "segmentation" + +fcn8s: + model: "fcn8s-heavy-pascal.caffemodel" + config: "fcn8s-heavy-pascal.prototxt" + mean: [0, 0, 0] + scale: 1.0 + width: 500 + height: 500 + rgb: false + sample: "segmentation" diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index 161f7434f8..756978a574 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -5,22 +5,16 @@ #include #include -const char* keys = +#include "common.hpp" + +std::string keys = "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | models.yml | An optional path to file with preprocessing parameters }" "{ device | 0 | camera device number. }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" - "{ model m | | Path to a binary file of model contains trained weights. " - "It could be a file with extensions .caffemodel (Caffe), " - ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).}" - "{ config c | | Path to a text file of model contains network configuration. " - "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).}" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes to label detected objects. }" - "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | -1 | Preprocess input image by resizing to a specific width. }" - "{ height | -1 | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ thr | .5 | Confidence threshold. }" "{ nms | .4 | Non-maximum suppression threshold. }" "{ backend | 0 | Choose one of computation backends: " @@ -52,6 +46,13 @@ std::vector getOutputsNames(const Net& net); int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); + + const std::string modelName = parser.get("@alias"); + const std::string zooFile = parser.get("zoo"); + + keys += genPreprocArguments(modelName, zooFile); + + parser = CommandLineParser(argc, argv, keys); parser.about("Use this script to run object detection deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { @@ -66,6 +67,9 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); + CV_Assert(parser.has("model")); + std::string modelPath = findFile(parser.get("model")); + std::string configPath = findFile(parser.get("config")); // Open file with classes names. if (parser.has("classes")) @@ -82,8 +86,7 @@ int main(int argc, char** argv) } // Load a model. - CV_Assert(parser.has("model")); - Net net = readNet(parser.get("model"), parser.get("config"), parser.get("framework")); + Net net = readNet(modelPath, configPath, parser.get("framework")); net.setPreferableBackend(parser.get("backend")); net.setPreferableTarget(parser.get("target")); diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 76c33f8e3b..77855ede2c 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -1,8 +1,8 @@ import cv2 as cv import argparse -import sys import numpy as np +from common import * from tf_text_graph_common import readTextMessage from tf_text_graph_ssd import createSSDGraph from tf_text_graph_faster_rcnn import createFasterRCNNGraph @@ -10,15 +10,10 @@ from tf_text_graph_faster_rcnn import createFasterRCNNGraph backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run object detection deep learning networks using OpenCV.') +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary file of model contains trained weights. ' - 'It could be a file with extensions .caffemodel (Caffe), ' - '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet), .bin (OpenVINO)') -parser.add_argument('--config', - help='Path to a text file of model contains network configuration. ' - 'It could be a file with extensions .prototxt (Caffe), .pbtxt or .config (TensorFlow), .cfg (Darknet), .xml (OpenVINO)') parser.add_argument('--out_tf_graph', default='graph.pbtxt', help='For models from TensorFlow Object Detection API, you may ' 'pass a .config file which was used for training through --config ' @@ -26,18 +21,6 @@ parser.add_argument('--out_tf_graph', default='graph.pbtxt', parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet', 'dldt'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') -parser.add_argument('--classes', help='Optional path to a text file with names of classes to label detected objects.') -parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], - help='Preprocess input image by subtracting mean values. ' - 'Mean values should be in BGR order.') -parser.add_argument('--scale', type=float, default=1.0, - help='Preprocess input image by multiplying on a scale factor.') -parser.add_argument('--width', type=int, - help='Preprocess input image by resizing to a specific width.') -parser.add_argument('--height', type=int, - help='Preprocess input image by resizing to a specific height.') -parser.add_argument('--rgb', action='store_true', - help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, @@ -52,8 +35,17 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() +add_preproc_args(args.zoo, parser, 'object_detection') +parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run object detection deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() +args.model = findFile(args.model) +args.config = findFile(args.config) +args.classes = findFile(args.classes) + # If config specified, try to load it as TensorFlow Object Detection API's pipeline. config = readTextMessage(args.config) if 'model' in config: diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp index 70e8d7b5b4..30b29dc449 100644 --- a/samples/dnn/segmentation.cpp +++ b/samples/dnn/segmentation.cpp @@ -5,24 +5,18 @@ #include #include -const char* keys = +#include "common.hpp" + +std::string keys = "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | models.yml | An optional path to file with preprocessing parameters }" "{ device | 0 | camera device number. }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" - "{ model m | | Path to a binary file of model contains trained weights. " - "It could be a file with extensions .caffemodel (Caffe), " - ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet). }" - "{ config c | | Path to a text file of model contains network configuration. " - "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet). }" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes. }" "{ colors | | Optional path to a text file with colors for an every class. " "An every color is represented with three values from 0 to 255 in BGR channels order. }" - "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | | Preprocess input image by resizing to a specific width. }" - "{ height | | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ backend | 0 | Choose one of computation backends: " "0: automatically (by default), " "1: Halide language (http://halide-lang.org/), " @@ -47,6 +41,13 @@ void colorizeSegmentation(const Mat &score, Mat &segm); int main(int argc, char** argv) { CommandLineParser parser(argc, argv, keys); + + const std::string modelName = parser.get("@alias"); + const std::string zooFile = parser.get("zoo"); + + keys += genPreprocArguments(modelName, zooFile); + + parser = CommandLineParser(argc, argv, keys); parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV."); if (argc == 1 || parser.has("help")) { @@ -59,8 +60,8 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); - String model = parser.get("model"); - String config = parser.get("config"); + String model = findFile(parser.get("model")); + String config = findFile(parser.get("config")); String framework = parser.get("framework"); int backendId = parser.get("backend"); int targetId = parser.get("target"); diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py index b615b96028..a926ca27b3 100644 --- a/samples/dnn/segmentation.py +++ b/samples/dnn/segmentation.py @@ -3,35 +3,20 @@ import argparse import numpy as np import sys +from common import * + backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -parser = argparse.ArgumentParser(description='Use this script to run semantic segmentation deep learning networks using OpenCV.') +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--model', required=True, - help='Path to a binary file of model contains trained weights. ' - 'It could be a file with extensions .caffemodel (Caffe), ' - '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') -parser.add_argument('--config', - help='Path to a text file of model contains network configuration. ' - 'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], help='Optional name of an origin framework of the model. ' 'Detect it automatically if it does not set.') -parser.add_argument('--classes', help='Optional path to a text file with names of classes.') parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' 'An every color is represented with three values from 0 to 255 in BGR channels order.') -parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], - help='Preprocess input image by subtracting mean values. ' - 'Mean values should be in BGR order.') -parser.add_argument('--scale', type=float, default=1.0, - help='Preprocess input image by multiplying on a scale factor.') -parser.add_argument('--width', type=int, required=True, - help='Preprocess input image by resizing to a specific width.') -parser.add_argument('--height', type=int, required=True, - help='Preprocess input image by resizing to a specific height.') -parser.add_argument('--rgb', action='store_true', - help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -44,8 +29,17 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +args, _ = parser.parse_known_args() +add_preproc_args(args.zoo, parser, 'segmentation') +parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run semantic segmentation deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) args = parser.parse_args() +args.model = findFile(args.model) +args.config = findFile(args.config) +args.classes = findFile(args.classes) + np.random.seed(324) # Load names of classes @@ -93,8 +87,13 @@ while cv.waitKey(1) < 0: cv.waitKey() break + frameHeight = frame.shape[0] + frameWidth = frame.shape[1] + # Create a 4D blob from a frame. - blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) + inpWidth = args.width if args.width else frameWidth + inpHeight = args.height if args.height else frameHeight + blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) # Run a model net.setInput(blob) @@ -115,7 +114,7 @@ while cv.waitKey(1) < 0: segm = np.stack([colors[idx] for idx in classIds.flatten()]) segm = segm.reshape(height, width, 3) - segm = cv.resize(segm, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_NEAREST) + segm = cv.resize(segm, (frameWidth, frameHeight), interpolation=cv.INTER_NEAREST) frame = (0.1 * frame + 0.9 * segm).astype(np.uint8) # Put efficiency information. diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py index 564c572d58..bf04c42174 100644 --- a/samples/dnn/tf_text_graph_common.py +++ b/samples/dnn/tf_text_graph_common.py @@ -72,6 +72,8 @@ def parseMessage(tokens, idx): def readTextMessage(filePath): + if not filePath: + return {} with open(filePath, 'rt') as f: content = f.read()