diff --git a/apps/interactive-calibration/calibCommon.hpp b/apps/interactive-calibration/calibCommon.hpp index 6acf610897..f422ff0531 100644 --- a/apps/interactive-calibration/calibCommon.hpp +++ b/apps/interactive-calibration/calibCommon.hpp @@ -80,7 +80,7 @@ namespace calib cv::Size boardSize; int charucoDictName; int calibrationStep; - float charucoSquareLenght, charucoMarkerSize; + float charucoSquareLength, charucoMarkerSize; float captureDelay; float squareSize; float templDst; diff --git a/apps/interactive-calibration/defaultConfig.xml b/apps/interactive-calibration/defaultConfig.xml index d14ba865d3..cad201fba1 100644 --- a/apps/interactive-calibration/defaultConfig.xml +++ b/apps/interactive-calibration/defaultConfig.xml @@ -1,7 +1,7 @@ 0 -200 +200 100 1 30 diff --git a/apps/interactive-calibration/frameProcessor.cpp b/apps/interactive-calibration/frameProcessor.cpp index aecc2c0770..90904a42cd 100644 --- a/apps/interactive-calibration/frameProcessor.cpp +++ b/apps/interactive-calibration/frameProcessor.cpp @@ -273,7 +273,7 @@ CalibProcessor::CalibProcessor(cv::Ptr data, captureParameters #ifdef HAVE_OPENCV_ARUCO mArucoDictionary = cv::aruco::getPredefinedDictionary( cv::aruco::PREDEFINED_DICTIONARY_NAME(capParams.charucoDictName)); - mCharucoBoard = cv::aruco::CharucoBoard::create(mBoardSize.width, mBoardSize.height, capParams.charucoSquareLenght, + mCharucoBoard = cv::aruco::CharucoBoard::create(mBoardSize.width, mBoardSize.height, capParams.charucoSquareLength, capParams.charucoMarkerSize, mArucoDictionary); #endif break; diff --git a/apps/interactive-calibration/main.cpp b/apps/interactive-calibration/main.cpp index f3d1e5d66b..6468c88ce7 100644 --- a/apps/interactive-calibration/main.cpp +++ b/apps/interactive-calibration/main.cpp @@ -181,7 +181,7 @@ int main(int argc, char** argv) cv::aruco::getPredefinedDictionary(cv::aruco::PREDEFINED_DICTIONARY_NAME(capParams.charucoDictName)); cv::Ptr charucoboard = cv::aruco::CharucoBoard::create(capParams.boardSize.width, capParams.boardSize.height, - capParams.charucoSquareLenght, capParams.charucoMarkerSize, dictionary); + capParams.charucoSquareLength, capParams.charucoMarkerSize, dictionary); globalData->totalAvgErr = cv::aruco::calibrateCameraCharuco(globalData->allCharucoCorners, globalData->allCharucoIds, charucoboard, globalData->imageSize, diff --git a/apps/interactive-calibration/parametersController.cpp b/apps/interactive-calibration/parametersController.cpp index 8b44ba0409..c76b915c63 100644 --- a/apps/interactive-calibration/parametersController.cpp +++ b/apps/interactive-calibration/parametersController.cpp @@ -37,7 +37,10 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName) } readFromNode(reader["charuco_dict"], mCapParams.charucoDictName); - readFromNode(reader["charuco_square_lenght"], mCapParams.charucoSquareLenght); + if (readFromNode(reader["charuco_square_lenght"], mCapParams.charucoSquareLength)) { + std::cout << "DEPRECATION: Parameter 'charuco_square_lenght' has been deprecated (typo). Use 'charuco_square_length' instead." << std::endl; + } + readFromNode(reader["charuco_square_length"], mCapParams.charucoSquareLength); readFromNode(reader["charuco_marker_size"], mCapParams.charucoMarkerSize); readFromNode(reader["camera_resolution"], mCapParams.cameraResolution); readFromNode(reader["calibration_step"], mCapParams.calibrationStep); @@ -51,7 +54,7 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName) bool retValue = checkAssertion(mCapParams.charucoDictName >= 0, "Dict name must be >= 0") && checkAssertion(mCapParams.charucoMarkerSize > 0, "Marker size must be positive") && - checkAssertion(mCapParams.charucoSquareLenght > 0, "Square size must be positive") && + checkAssertion(mCapParams.charucoSquareLength > 0, "Square size must be positive") && checkAssertion(mCapParams.minFramesNum > 1, "Minimal number of frames for calibration < 1") && checkAssertion(mCapParams.calibrationStep > 0, "Calibration step must be positive") && checkAssertion(mCapParams.maxFramesNum > mCapParams.minFramesNum, "maxFramesNum < minFramesNum") && @@ -119,7 +122,7 @@ bool calib::parametersController::loadFromParser(cv::CommandLineParser &parser) mCapParams.board = chAruco; mCapParams.boardSize = cv::Size(6, 8); mCapParams.charucoDictName = 0; - mCapParams.charucoSquareLenght = 200; + mCapParams.charucoSquareLength = 200; mCapParams.charucoMarkerSize = 100; } else { diff --git a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown index 0c4d67ec85..8280262cdd 100644 --- a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown +++ b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown @@ -64,7 +64,7 @@ By default values of advanced parameters are stored in defaultConfig.xml 0 -200 +200 100 1 30 @@ -78,7 +78,7 @@ By default values of advanced parameters are stored in defaultConfig.xml @endcode - *charuco_dict*: name of special dictionary, which has been used for generation of chAruco pattern -- *charuco_square_lenght*: size of square on chAruco board (in pixels) +- *charuco_square_length*: size of square on chAruco board (in pixels) - *charuco_marker_size*: size of Aruco markers on chAruco board (in pixels) - *calibration_step*: interval in frames between launches of @ref cv::calibrateCamera - *max_frames_num*: if number of frames for calibration is greater then this value frames filter starts working. @@ -91,7 +91,7 @@ QR faster than SVD, but potentially less precise - *frame_filter_conv_param*: parameter which used in linear convolution of bicriterial frames filter - *camera_resolution*: resolution of camera which is used for calibration -**Note:** *charuco_dict*, *charuco_square_lenght* and *charuco_marker_size* are used for chAruco pattern generation +**Note:** *charuco_dict*, *charuco_square_length* and *charuco_marker_size* are used for chAruco pattern generation (see Aruco module description for details: [Aruco tutorials](https://github.com/opencv/opencv_contrib/tree/master/modules/aruco/tutorials)) Default chAruco pattern: diff --git a/modules/calib3d/src/fundam.cpp b/modules/calib3d/src/fundam.cpp index c5f93bcde5..5363e949fd 100644 --- a/modules/calib3d/src/fundam.cpp +++ b/modules/calib3d/src/fundam.cpp @@ -490,12 +490,47 @@ static int run7Point( const Mat& _m1, const Mat& _m2, Mat& _fmatrix ) double* fmatrix = _fmatrix.ptr(); int i, k, n; + Point2d m1c(0, 0), m2c(0, 0); + double t, scale1 = 0, scale2 = 0; + const int count = 7; + + // compute centers and average distances for each of the two point sets + for( i = 0; i < count; i++ ) + { + m1c += Point2d(m1[i]); + m2c += Point2d(m2[i]); + } + + // calculate the normalizing transformations for each of the point sets: + // after the transformation each set will have the mass center at the coordinate origin + // and the average distance from the origin will be ~sqrt(2). + t = 1./count; + m1c *= t; + m2c *= t; + + for( i = 0; i < count; i++ ) + { + scale1 += norm(Point2d(m1[i].x - m1c.x, m1[i].y - m1c.y)); + scale2 += norm(Point2d(m2[i].x - m2c.x, m2[i].y - m2c.y)); + } + + scale1 *= t; + scale2 *= t; + + if( scale1 < FLT_EPSILON || scale2 < FLT_EPSILON ) + return 0; + + scale1 = std::sqrt(2.)/scale1; + scale2 = std::sqrt(2.)/scale2; + // form a linear system: i-th row of A(=a) represents // the equation: (m2[i], 1)'*F*(m1[i], 1) = 0 for( i = 0; i < 7; i++ ) { - double x0 = m1[i].x, y0 = m1[i].y; - double x1 = m2[i].x, y1 = m2[i].y; + double x0 = (m1[i].x - m1c.x)*scale1; + double y0 = (m1[i].y - m1c.y)*scale1; + double x1 = (m2[i].x - m2c.x)*scale2; + double y1 = (m2[i].y - m2c.y)*scale2; a[i*9+0] = x1*x0; a[i*9+1] = x1*y0; @@ -559,6 +594,10 @@ static int run7Point( const Mat& _m1, const Mat& _m2, Mat& _fmatrix ) if( n < 1 || n > 3 ) return n; + // transformation matrices + Matx33d T1( scale1, 0, -scale1*m1c.x, 0, scale1, -scale1*m1c.y, 0, 0, 1 ); + Matx33d T2( scale2, 0, -scale2*m2c.x, 0, scale2, -scale2*m2c.y, 0, 0, 1 ); + for( k = 0; k < n; k++, fmatrix += 9 ) { // for each root form the fundamental matrix @@ -577,6 +616,14 @@ static int run7Point( const Mat& _m1, const Mat& _m2, Mat& _fmatrix ) for( i = 0; i < 8; i++ ) fmatrix[i] = f1[i]*lambda + f2[i]*mu; + + // de-normalize + Mat F(3, 3, CV_64F, fmatrix); + F = T2.t() * F * T1; + + // make F(3,3) = 1 + if(fabs(F.at(8)) > FLT_EPSILON ) + F *= 1. / F.at(8); } return n; diff --git a/modules/core/src/persistence_yml.cpp b/modules/core/src/persistence_yml.cpp index 7742e82770..3f3742b8d1 100644 --- a/modules/core/src/persistence_yml.cpp +++ b/modules/core/src/persistence_yml.cpp @@ -452,19 +452,19 @@ public: if ( d == '<') //support of full type heading from YAML 1.2 { const char* yamlTypeHeading = "' && (size_t)(typeEndPtr - ptr) > headingLenght ) + if ( d == '>' && (size_t)(typeEndPtr - ptr) > headingLength ) { - if ( memcmp(ptr, yamlTypeHeading, headingLenght) == 0 ) + if ( memcmp(ptr, yamlTypeHeading, headingLength) == 0 ) { *typeEndPtr = ' '; - ptr += headingLenght - 1; + ptr += headingLength - 1; is_user = true; //value_type |= FileNode::USER; } diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp index fdd39468a1..4a5a1f1c0a 100644 --- a/modules/dnn/src/caffe/caffe_importer.cpp +++ b/modules/dnn/src/caffe/caffe_importer.cpp @@ -75,6 +75,17 @@ static cv::String toString(const T &v) return ss.str(); } +static inline +MatShape parseBlobShape(const caffe::BlobShape& _input_shape) +{ + MatShape shape; + for (int i = 0; i < _input_shape.dim_size(); i++) + { + shape.push_back((int)_input_shape.dim(i)); + } + return shape; +} + class CaffeImporter { caffe::NetParameter net; @@ -235,10 +246,7 @@ public: } else if (pbBlob.has_shape()) { - const caffe::BlobShape &_shape = pbBlob.shape(); - - for (int i = 0; i < _shape.dim_size(); i++) - shape.push_back((int)_shape.dim(i)); + shape = parseBlobShape(pbBlob.shape()); } else shape.resize(1, 1); // Is a scalar. @@ -334,12 +342,49 @@ public: //setup input layer names std::vector netInputs(net.input_size()); + std::vector inp_shapes; { - for (int inNum = 0; inNum < net.input_size(); inNum++) + int net_input_size = net.input_size(); + for (int inNum = 0; inNum < net_input_size; inNum++) { addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum)); netInputs[inNum] = net.input(inNum); } + + if (net.input_dim_size() > 0) // deprecated in Caffe proto + { + int net_input_dim_size = net.input_dim_size(); + CV_Check(net_input_dim_size, net_input_dim_size % 4 == 0, ""); + CV_CheckEQ(net_input_dim_size, net_input_size * 4, ""); + for (int inp_id = 0; inp_id < net_input_size; inp_id++) + { + int dim = inp_id * 4; + MatShape shape(4); + shape[0] = net.input_dim(dim); + shape[1] = net.input_dim(dim+1); + shape[2] = net.input_dim(dim+2); + shape[3] = net.input_dim(dim+3); + inp_shapes.push_back(shape); + } + } + else if (net.input_shape_size() > 0) // deprecated in Caffe proto + { + int net_input_shape_size = net.input_shape_size(); + CV_CheckEQ(net_input_shape_size, net_input_size, ""); + for (int inp_id = 0; inp_id < net_input_shape_size; inp_id++) + { + MatShape shape = parseBlobShape(net.input_shape(inp_id)); + inp_shapes.push_back(shape); + } + } + else + { + for (int inp_id = 0; inp_id < net_input_size; inp_id++) + { + MatShape shape; // empty + inp_shapes.push_back(shape); + } + } } for (int li = 0; li < layersSize; li++) @@ -364,6 +409,17 @@ public: addedBlobs.back().outNum = netInputs.size(); netInputs.push_back(addedBlobs.back().name); } + if (layer.has_input_param()) + { + const caffe::InputParameter &inputParameter = layer.input_param(); + int input_shape_size = inputParameter.shape_size(); + CV_CheckEQ(input_shape_size, layer.top_size(), ""); + for (int inp_id = 0; inp_id < input_shape_size; inp_id++) + { + MatShape shape = parseBlobShape(inputParameter.shape(inp_id)); + inp_shapes.push_back(shape); + } + } continue; } else if (type == "BatchNorm") @@ -424,35 +480,15 @@ public: } dstNet.setInputsNames(netInputs); - std::vector inp_shapes; - if (net.input_shape_size() > 0 || (layersSize > 0 && net.layer(0).has_input_param() && - net.layer(0).input_param().shape_size() > 0)) { - - int size = (net.input_shape_size() > 0) ? net.input_shape_size() : - net.layer(0).input_param().shape_size(); - for (int inp_id = 0; inp_id < size; inp_id++) + if (inp_shapes.size() > 0) + { + CV_CheckEQ(inp_shapes.size(), netInputs.size(), ""); + for (int inp_id = 0; inp_id < inp_shapes.size(); inp_id++) { - const caffe::BlobShape &_input_shape = (net.input_shape_size() > 0) ? - net.input_shape(inp_id) : - net.layer(0).input_param().shape(inp_id); - MatShape shape; - for (int i = 0; i < _input_shape.dim_size(); i++) { - shape.push_back((int)_input_shape.dim(i)); - } - inp_shapes.push_back(shape); + if (!inp_shapes[inp_id].empty()) + dstNet.setInput(Mat(inp_shapes[inp_id], CV_32F), netInputs[inp_id]); } } - else if (net.input_dim_size() > 0) { - MatShape shape; - for (int dim = 0; dim < net.input_dim_size(); dim++) { - shape.push_back(net.input_dim(dim)); - } - inp_shapes.push_back(shape); - } - - for (int inp_id = 0; inp_id < inp_shapes.size(); inp_id++) { - dstNet.setInput(Mat(inp_shapes[inp_id], CV_32F), netInputs[inp_id]); - } addedBlobs.clear(); } diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 7a87d46dc2..0577e2c834 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1418,13 +1418,15 @@ struct Net::Impl clear(); + this->blobsToKeep = blobsToKeep_; + allocateLayers(blobsToKeep_); MapIdToLayerData::iterator it = layers.find(0); CV_Assert(it != layers.end()); it->second.skip = netInputLayer->skip; - initBackend(); + initBackend(blobsToKeep_); if (!netWasAllocated) { @@ -1437,7 +1439,6 @@ struct Net::Impl } netWasAllocated = true; - this->blobsToKeep = blobsToKeep_; if (DNN_NETWORK_DUMP > 0) { @@ -1564,7 +1565,7 @@ struct Net::Impl ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); } - void initBackend() + void initBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); if (preferableBackend == DNN_BACKEND_OPENCV) @@ -1574,7 +1575,7 @@ struct Net::Impl else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { #ifdef HAVE_INF_ENGINE - initInfEngineBackend(); + initInfEngineBackend(blobsToKeep_); #else CV_Assert(false && "This OpenCV version is built without Inference Engine API support"); #endif @@ -1582,7 +1583,7 @@ struct Net::Impl else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { #ifdef HAVE_DNN_NGRAPH - initNgraphBackend(); + initNgraphBackend(blobsToKeep_); #else CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph"); #endif @@ -1688,7 +1689,7 @@ struct Net::Impl } } - void initInfEngineBackend() + void initInfEngineBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine()); @@ -1878,6 +1879,15 @@ struct Net::Impl CV_Assert(!ieNode.empty()); ieNode->net = net; + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + ieNode->net->addOutput(ieNode->layer.getName()); + break; + } + } + // Convert weights in FP16 for specific targets. if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD || @@ -1984,7 +1994,7 @@ struct Net::Impl } } - void initNgraphBackend() + void initNgraphBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine()); @@ -2173,6 +2183,14 @@ struct Net::Impl // TF EAST_text_detection ieNode->net->setUnconnectedNodes(ieNode); } + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + ieNode->net->addOutput(ieNode->node->get_friendly_name()); + break; + } + } ieNode->net->setNodePtr(&ieNode->node); net->addBlobs(ld.inputBlobsWrappers); diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 2a00880c42..d7df547412 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -231,11 +231,10 @@ void InfEngineNgraphNet::init(Target targetId) } } } - } else { - for (const auto& name : requestedOutputs) - { - cnn.addOutput(name); - } + } + for (const auto& name : requestedOutputs) + { + cnn.addOutput(name); } for (const auto& it : cnn.getInputsInfo()) diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp index ab242e1b2e..a4443ed3a2 100644 --- a/modules/dnn/src/layers/crop_and_resize_layer.cpp +++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp @@ -5,6 +5,7 @@ // Copyright (C) 2018, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. #include "../precomp.hpp" +#include "../ie_ngraph.hpp" #include "layers_common.hpp" #ifdef HAVE_CUDA @@ -25,6 +26,14 @@ public: outHeight = params.get("height"); } + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV + || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH + || backendId == DNN_BACKEND_CUDA + ; + } + bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -41,11 +50,6 @@ public: return false; } - virtual bool supportBackend(int backendId) CV_OVERRIDE - { - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; - } - void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); @@ -121,6 +125,41 @@ public: } } +#ifdef HAVE_DNN_NGRAPH + virtual Ptr initNgraph(const std::vector >& inputs, + const std::vector >& nodes) CV_OVERRIDE + { + // Slice second input: from 1x1xNx7 to 1x1xNx5 + auto input = nodes[0].dynamicCast()->node; + auto rois = nodes[1].dynamicCast()->node; + + std::vector dims = rois->get_shape(), offsets(4, 0); + offsets[3] = 2; + dims[3] = 7; + + auto lower_bounds = std::make_shared(ngraph::element::i64, + ngraph::Shape{offsets.size()}, offsets.data()); + auto upper_bounds = std::make_shared(ngraph::element::i64, + ngraph::Shape{dims.size()}, dims.data()); + auto strides = std::make_shared(ngraph::element::i64, + ngraph::Shape{dims.size()}, std::vector((int64_t)dims.size(), 1)); + auto slice = std::make_shared(rois, + lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); + + // Reshape rois from 4D to 2D + std::vector shapeData = {dims[2], 5}; + auto shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shapeData.data()); + auto reshape = std::make_shared(slice, shape, true); + + auto roiPooling = + std::make_shared(input, reshape, + ngraph::Shape{(size_t)outHeight, (size_t)outWidth}, + 1.0f, "bilinear"); + + return Ptr(new InfEngineNgraphNode(roiPooling)); + } +#endif // HAVE_DNN_NGRAPH + #ifdef HAVE_CUDA Ptr initCUDA( void *context_, diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 0c5f76baab..3a06bb0788 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -41,7 +41,7 @@ public: CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y")); } interpolation = params.get("interpolation"); - CV_Assert(interpolation == "nearest" || interpolation == "bilinear"); + CV_Assert(interpolation == "nearest" || interpolation == "opencv_linear" || interpolation == "bilinear"); alignCorners = params.get("align_corners", false); } @@ -115,14 +115,15 @@ public: Mat& inp = inputs[0]; Mat& out = outputs[0]; - if (interpolation == "nearest") + if (interpolation == "nearest" || interpolation == "opencv_linear") { + InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR; for (size_t n = 0; n < inputs[0].size[0]; ++n) { for (size_t ch = 0; ch < inputs[0].size[1]; ++ch) { resize(getPlane(inp, n, ch), getPlane(out, n, ch), - Size(outWidth, outHeight), 0, 0, INTER_NEAREST); + Size(outWidth, outHeight), 0, 0, mode); } } } diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 684b971cbb..c60a4174b7 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -61,7 +61,8 @@ public: return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1); + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1) || + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0); } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE @@ -263,22 +264,26 @@ public: auto ieInpNode = nodes[0].dynamicCast()->node; std::vector shape(ieInpNode->get_shape().size(), 1); - shape[1] = numChannels; - auto weight = hasWeights ? - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), blobs[0].data) : - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), std::vector(numChannels, 1).data()); + int cAxis = clamp(axis, shape.size()); + shape[cAxis] = numChannels; - auto bias = hasBias ? - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), blobs.back().data) : - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), std::vector(numChannels, 0).data()); - - auto scale_node = std::make_shared(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); - auto scale_shift = std::make_shared(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY); - return Ptr(new InfEngineNgraphNode(scale_shift)); + auto node = ieInpNode; + if (hasWeights) + { + auto weight = std::make_shared(ngraph::element::f32, + ngraph::Shape(shape), blobs[0].data); + node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + } + if (hasBias || !hasWeights) + { + auto bias = hasBias ? + std::make_shared(ngraph::element::f32, + ngraph::Shape(shape), blobs.back().data) : + std::make_shared(ngraph::element::f32, + ngraph::Shape(shape), std::vector(numChannels, 0).data()); + node = std::make_shared(node, bias, ngraph::op::AutoBroadcastType::NUMPY); + } + return Ptr(new InfEngineNgraphNode(node)); } #endif // HAVE_DNN_NGRAPH diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 832bbcacbf..c68846299f 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -485,16 +485,23 @@ void ONNXImporter::populateNet(Net dstNet) } else if (layer_type == "Split") { - DictValue splits = layerParams.get("split"); - const int numSplits = splits.size(); - CV_Assert(numSplits > 1); - - std::vector slicePoints(numSplits - 1, splits.get(0)); - for (int i = 1; i < splits.size() - 1; ++i) + if (layerParams.has("split")) { - slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); + DictValue splits = layerParams.get("split"); + const int numSplits = splits.size(); + CV_Assert(numSplits > 1); + + std::vector slicePoints(numSplits - 1, splits.get(0)); + for (int i = 1; i < splits.size() - 1; ++i) + { + slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); + } + layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); + } + else + { + layerParams.set("num_split", node_proto.output_size()); } - layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); layerParams.type = "Slice"; } else if (layer_type == "Add" || layer_type == "Sum") @@ -973,6 +980,15 @@ void ONNXImporter::populateNet(Net dstNet) replaceLayerParam(layerParams, "width_scale", "zoom_factor_x"); } replaceLayerParam(layerParams, "mode", "interpolation"); + + if (layerParams.get("interpolation") == "linear" && framework_name == "pytorch") { + layerParams.type = "Resize"; + Mat scales = getBlob(node_proto, constBlobs, 1); + CV_Assert(scales.total() == 4); + layerParams.set("interpolation", "opencv_linear"); + layerParams.set("zoom_factor_y", scales.at(2)); + layerParams.set("zoom_factor_x", scales.at(3)); + } } else if (layer_type == "LogSoftmax") { diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index e697c5a798..869a3575d7 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -73,28 +73,7 @@ struct OpenVINOModelTestCaseInfo static const std::map& getOpenVINOTestModels() { static std::map g_models { -#if INF_ENGINE_RELEASE <= 2018050000 - { "age-gender-recognition-retail-0013", { - "deployment_tools/intel_models/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013", - "deployment_tools/intel_models/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013" - }}, - { "face-person-detection-retail-0002", { - "deployment_tools/intel_models/face-person-detection-retail-0002/FP32/face-person-detection-retail-0002", - "deployment_tools/intel_models/face-person-detection-retail-0002/FP16/face-person-detection-retail-0002" - }}, - { "head-pose-estimation-adas-0001", { - "deployment_tools/intel_models/head-pose-estimation-adas-0001/FP32/head-pose-estimation-adas-0001", - "deployment_tools/intel_models/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001" - }}, - { "person-detection-retail-0002", { - "deployment_tools/intel_models/person-detection-retail-0002/FP32/person-detection-retail-0002", - "deployment_tools/intel_models/person-detection-retail-0002/FP16/person-detection-retail-0002" - }}, - { "vehicle-detection-adas-0002", { - "deployment_tools/intel_models/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002", - "deployment_tools/intel_models/vehicle-detection-adas-0002/FP16/vehicle-detection-adas-0002" - }} -#else +#if INF_ENGINE_RELEASE >= 2018050000 // layout is defined by open_model_zoo/model_downloader // Downloaded using these parameters for Open Model Zoo downloader (2019R1): // ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \ @@ -118,7 +97,16 @@ static const std::map& getOpenVINOTestMo { "vehicle-detection-adas-0002", { "Transportation/object_detection/vehicle/mobilenet-reduced-ssd/dldt/vehicle-detection-adas-0002", "Transportation/object_detection/vehicle/mobilenet-reduced-ssd/dldt/vehicle-detection-adas-0002-fp16" - }} + }}, +#endif +#if INF_ENGINE_RELEASE >= 2020010000 + // Downloaded using these parameters for Open Model Zoo downloader (2020.1): + // ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \ + // --name person-detection-retail-0013 + { "person-detection-retail-0013", { // IRv10 + "intel/person-detection-retail-0013/FP32/person-detection-retail-0013", + "intel/person-detection-retail-0013/FP16/person-detection-retail-0013" + }}, #endif }; @@ -305,8 +293,8 @@ TEST_P(DNNTestOpenVINO, models) OpenVINOModelTestCaseInfo modelInfo = it->second; std::string modelPath = isFP16 ? modelInfo.modelPathFP16 : modelInfo.modelPathFP32; - std::string xmlPath = findDataFile(modelPath + ".xml"); - std::string binPath = findDataFile(modelPath + ".bin"); + std::string xmlPath = findDataFile(modelPath + ".xml", false); + std::string binPath = findDataFile(modelPath + ".bin", false); std::map inputsMap; std::map ieOutputsMap, cvOutputsMap; @@ -316,13 +304,19 @@ TEST_P(DNNTestOpenVINO, models) runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap); runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap); + double eps = 0; +#if INF_ENGINE_VER_MAJOR_GE(2020010000) + if (targetId == DNN_TARGET_CPU && checkHardwareSupport(CV_CPU_AVX_512F)) + eps = 1e-5; +#endif + EXPECT_EQ(ieOutputsMap.size(), cvOutputsMap.size()); for (auto& srcIt : ieOutputsMap) { auto dstIt = cvOutputsMap.find(srcIt.first); CV_Assert(dstIt != cvOutputsMap.end()); double normInf = cvtest::norm(srcIt.second, dstIt->second, cv::NORM_INF); - EXPECT_EQ(normInf, 0); + EXPECT_LE(normInf, eps) << "output=" << srcIt.first; } } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 1fca878ec1..df953ca79d 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -335,6 +335,9 @@ TEST_P(Test_ONNX_layers, Padding) TEST_P(Test_ONNX_layers, Resize) { testONNXModels("resize_nearest"); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + testONNXModels("resize_bilinear"); } TEST_P(Test_ONNX_layers, MultyInputs) @@ -411,6 +414,18 @@ TEST_P(Test_ONNX_layers, ReduceL2) testONNXModels("reduceL2"); } +TEST_P(Test_ONNX_layers, Split) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("split_1"); + testONNXModels("split_2"); + testONNXModels("split_3"); + testONNXModels("split_4"); +} + TEST_P(Test_ONNX_layers, Slice) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 1ef235e8a0..e25243b52d 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -994,8 +994,16 @@ TEST(Test_TensorFlow, two_inputs) normAssert(out, firstInput + secondInput); } -TEST(Test_TensorFlow, Mask_RCNN) +TEST_P(Test_TensorFlow_nets, Mask_RCNN) { + static const double kMaskThreshold = 0.5; + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + + if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + applyTestTag(CV_TEST_TAG_MEMORY_1GB, CV_TEST_TAG_DEBUG_VERYLONG); Mat img = imread(findDataFile("dnn/street.png")); std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"); @@ -1006,7 +1014,8 @@ TEST(Test_TensorFlow, Mask_RCNN) Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy")); Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); net.setInput(blob); @@ -1020,7 +1029,10 @@ TEST(Test_TensorFlow, Mask_RCNN) Mat outDetections = outs[0]; Mat outMasks = outs[1]; - normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5); + + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.019 : 2e-5; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : default_lInf; + normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff); // Output size of masks is NxCxHxW where // N - number of detected boxes @@ -1044,7 +1056,18 @@ TEST(Test_TensorFlow, Mask_RCNN) outMasks(srcRanges).copyTo(masks(dstRanges)); } cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()}; - normAssert(masks, refMasks(&topRefMasks[0])); + refMasks = refMasks(&topRefMasks[0]); + + // make binary masks + cv::threshold(masks.reshape(1, 1), masks, kMaskThreshold, 1, THRESH_BINARY); + cv::threshold(refMasks.reshape(1, 1), refMasks, kMaskThreshold, 1, THRESH_BINARY); + + double inter = cv::countNonZero(masks & refMasks); + double area = cv::countNonZero(masks | refMasks); + EXPECT_GE(inter / area, 0.99); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + expectNoFallbacks(net); } } diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index 08b11ae89a..29a0278c98 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -112,11 +112,14 @@ static bool wasInitialized = false; BOOL autosize; BOOL firstContent; int status; + int x0, y0; } @property(assign) CvMouseCallback mouseCallback; @property(assign) void *mouseParam; @property(assign) BOOL autosize; @property(assign) BOOL firstContent; +@property(assign) int x0; +@property(assign) int y0; @property(retain) NSMutableDictionary *sliders; @property(readwrite) int status; - (CVView *)contentView; @@ -252,6 +255,16 @@ CV_IMPL void cvShowImage( const char* name, const CvArr* arr) contentSize.height = scaledImageSize.height + [window contentView].sliderHeight; contentSize.width = std::max(scaledImageSize.width, MIN_SLIDER_WIDTH); [window setContentSize:contentSize]; //adjust sliders to fit new window size + if([window firstContent]) + { + int x = [window x0]; + int y = [window y0]; + if(x >= 0 && y >= 0) + { + y = [[window screen] visibleFrame].size.height - y; + [window setFrameTopLeftPoint:NSMakePoint(x, y)]; + } + } } } [window setFirstContent:NO]; @@ -275,7 +288,6 @@ CV_IMPL void cvResizeWindow( const char* name, int width, int height) CV_IMPL void cvMoveWindow( const char* name, int x, int y) { - CV_FUNCNAME("cvMoveWindow"); __BEGIN__; @@ -287,8 +299,14 @@ CV_IMPL void cvMoveWindow( const char* name, int x, int y) //cout << "cvMoveWindow"<< endl; window = cvGetWindow(name); if(window) { - y = [[window screen] frame].size.height - y; - [window setFrameTopLeftPoint:NSMakePoint(x, y)]; + if([window firstContent]) { + [window setX0:x]; + [window setY0:y]; + } + else { + y = [[window screen] visibleFrame].size.height - y; + [window setFrameTopLeftPoint:NSMakePoint(x, y)]; + } } [localpool1 drain]; @@ -557,6 +575,8 @@ CV_IMPL int cvNamedWindow( const char* name, int flags ) [window setFrameTopLeftPoint:initContentRect.origin]; [window setFirstContent:YES]; + [window setX0:-1]; + [window setY0:-1]; [window setContentView:[[CVView alloc] init]]; @@ -819,6 +839,8 @@ static NSSize constrainAspectRatio(NSSize base, NSSize constraint) { @synthesize mouseParam; @synthesize autosize; @synthesize firstContent; +@synthesize x0; +@synthesize y0; @synthesize sliders; @synthesize status; diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp index 4b4f17ac94..d63e07969c 100644 --- a/modules/imgproc/test/test_imgwarp.cpp +++ b/modules/imgproc/test/test_imgwarp.cpp @@ -1413,6 +1413,21 @@ TEST(Resize, lanczos4_regression_16192) EXPECT_EQ(cvtest::norm(dst, expected, NORM_INF), 0) << dst(Rect(0,0,8,8)); } +TEST(Resize, DISABLED_nearest_regression_15075) // reverted https://github.com/opencv/opencv/pull/16497 +{ + const int C = 5; + const int i1 = 5, j1 = 5; + Size src_size(12, 12); + Size dst_size(11, 11); + + cv::Mat src = cv::Mat::zeros(src_size, CV_8UC(C)), dst; + for (int j = 0; j < C; j++) + src.col(i1).row(j1).data[j] = 1; + + cv::resize(src, dst, dst_size, 0, 0, INTER_NEAREST); + EXPECT_EQ(C, cvtest::norm(dst, NORM_L1)) << src.size; +} + TEST(Imgproc_Warp, multichannel) { static const int inter_types[] = {INTER_NEAREST, INTER_AREA, INTER_CUBIC, diff --git a/modules/python/package/template/config.py.in b/modules/python/package/template/config.py.in index 5fc444f175..7f18a920e0 100644 --- a/modules/python/package/template/config.py.in +++ b/modules/python/package/template/config.py.in @@ -1,3 +1,5 @@ +import os + BINARIES_PATHS = [ @CMAKE_PYTHON_BINARIES_PATH@ ] + BINARIES_PATHS diff --git a/modules/python/python_loader.cmake b/modules/python/python_loader.cmake index 663be5c824..31cd33505a 100644 --- a/modules/python/python_loader.cmake +++ b/modules/python/python_loader.cmake @@ -58,7 +58,13 @@ if(NOT OpenCV_FOUND) # Ignore "standalone" builds of Python bindings else() list(APPEND CMAKE_PYTHON_BINARIES_INSTALL_PATH "os.path.join(${CMAKE_PYTHON_EXTENSION_INSTALL_PATH_BASE}, '${OPENCV_LIB_INSTALL_PATH}')") endif() - string(REPLACE ";" ",\n " CMAKE_PYTHON_BINARIES_PATH "${CMAKE_PYTHON_BINARIES_INSTALL_PATH}") + set(CMAKE_PYTHON_BINARIES_PATH "${CMAKE_PYTHON_BINARIES_INSTALL_PATH}") + if (WIN32 AND HAVE_CUDA) + if (DEFINED CUDA_TOOLKIT_ROOT_DIR) + list(APPEND CMAKE_PYTHON_BINARIES_PATH "os.path.join(os.getenv('CUDA_PATH', '${CUDA_TOOLKIT_ROOT_DIR}'), 'bin')") + endif() + endif() + string(REPLACE ";" ",\n " CMAKE_PYTHON_BINARIES_PATH "${CMAKE_PYTHON_BINARIES_PATH}") configure_file("${PYTHON_SOURCE_DIR}/package/template/config.py.in" "${__python_loader_install_tmp_path}/cv2/config.py" @ONLY) install(FILES "${__python_loader_install_tmp_path}/cv2/config.py" DESTINATION "${OPENCV_PYTHON_INSTALL_PATH}/cv2/" COMPONENT python) endif() diff --git a/samples/cpp/falsecolor.cpp b/samples/cpp/falsecolor.cpp index ed422b39c1..f73ffad4ce 100644 --- a/samples/cpp/falsecolor.cpp +++ b/samples/cpp/falsecolor.cpp @@ -41,10 +41,10 @@ static Mat DrawMyImage(int thickness,int nbShape) { Mat img=Mat::zeros(500,256*thickness+100,CV_8UC1); int offsetx = 50, offsety = 25; - int lineLenght = 50; + int lineLength = 50; for (int i=0;i<256;i++) - line(img,Point(thickness*i+ offsetx, offsety),Point(thickness*i+ offsetx, offsety+ lineLenght),Scalar(i), thickness); + line(img,Point(thickness*i+ offsetx, offsety),Point(thickness*i+ offsetx, offsety+ lineLength),Scalar(i), thickness); RNG r; Point center; int radius; @@ -57,19 +57,19 @@ static Mat DrawMyImage(int thickness,int nbShape) int typeShape = r.uniform(MyCIRCLE, MyELLIPSE+1); switch (typeShape) { case MyCIRCLE: - center = Point(r.uniform(offsetx,img.cols- offsetx), r.uniform(offsety + lineLenght, img.rows - offsety)); + center = Point(r.uniform(offsetx,img.cols- offsetx), r.uniform(offsety + lineLength, img.rows - offsety)); radius = r.uniform(1, min(offsetx, offsety)); circle(img,center,radius,Scalar(i),-1); break; case MyRECTANGLE: - center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLenght, img.rows - offsety)); + center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLength, img.rows - offsety)); width = r.uniform(1, min(offsetx, offsety)); height = r.uniform(1, min(offsetx, offsety)); rc = Rect(center-Point(width ,height )/2, center + Point(width , height )/2); rectangle(img,rc, Scalar(i), -1); break; case MyELLIPSE: - center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLenght, img.rows - offsety)); + center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLength, img.rows - offsety)); width = r.uniform(1, min(offsetx, offsety)); height = r.uniform(1, min(offsetx, offsety)); angle = r.uniform(0, 180); diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py index c4ac11bad2..f84d2038e4 100644 --- a/samples/dnn/human_parsing.py +++ b/samples/dnn/human_parsing.py @@ -40,6 +40,7 @@ Follow these steps if you want to convert the original model yourself: ''' import argparse +import os.path import numpy as np import cv2 as cv @@ -48,12 +49,11 @@ backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv. targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) -def preprocess(image_path): +def preprocess(image): """ Create 4-dimensional blob from image and flip image - :param image_path: path to input image + :param image: input image """ - image = cv.imread(image_path) image_rev = np.flip(image, axis=1) input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434)) return input @@ -137,15 +137,15 @@ def decode_labels(gray_image): return segm -def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU): +def parse_human(image, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU): """ Prepare input for execution, run net and postprocess output to parse human. - :param image_path: path to input image + :param image: input image :param model_path: path to JPPNet model :param backend: name of computation backend :param target: name of computation target """ - input = preprocess(image_path) + input = preprocess(image) input_h, input_w = input.shape[2:] output = run_net(input, model_path, backend, target) grayscale_out = postprocess(output, (input_w, input_h)) @@ -157,7 +157,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input', '-i', required=True, help='Path to input image.') - parser.add_argument('--model', '-m', required=True, help='Path to pb model.') + parser.add_argument('--model', '-m', default='lip_jppnet_384.pb', help='Path to pb model.') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -171,7 +171,11 @@ if __name__ == '__main__': '%d: VPU' % targets) args, _ = parser.parse_known_args() - output = parse_human(args.input, args.model, args.backend, args.target) + if not os.path.isfile(args.model): + raise OSError("Model not exist") + + image = cv.imread(args.input) + output = parse_human(image, args.model, args.backend, args.target) winName = 'Deep learning human parsing in OpenCV' cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) cv.imshow(winName, output) diff --git a/samples/dnn/virtual_try_on.py b/samples/dnn/virtual_try_on.py new file mode 100644 index 0000000000..bbc655a276 --- /dev/null +++ b/samples/dnn/virtual_try_on.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python3 +''' +You can download the Geometric Matching Module model from https://www.dropbox.com/s/tyhc73xa051grjp/cp_vton_gmm.onnx?dl=0 +You can download the Try-On Module model from https://www.dropbox.com/s/q2x97ve2h53j66k/cp_vton_tom.onnx?dl=0 +You can download the cloth segmentation model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 +You can find the OpenPose proto in opencv_extra/testdata/dnn/openpose_pose_coco.prototxt +and get .caffemodel using opencv_extra/testdata/dnn/download_models.py +''' + +import argparse +import os.path +import numpy as np +import cv2 as cv + +from numpy import linalg +from common import findFile +from human_parsing import parse_human + +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) +targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) + +parser = argparse.ArgumentParser(description='Use this script to run virtial try-on using CP-VTON', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--input_image', '-i', required=True, help='Path to image with person.') +parser.add_argument('--input_cloth', '-c', required=True, help='Path to target cloth image') +parser.add_argument('--gmm_model', '-gmm', default='cp_vton_gmm.onnx', help='Path to Geometric Matching Module .onnx model.') +parser.add_argument('--tom_model', '-tom', default='cp_vton_tom.onnx', help='Path to Try-On Module .onnx model.') +parser.add_argument('--segmentation_model', default='lip_jppnet_384.pb', help='Path to cloth segmentation .pb model.') +parser.add_argument('--openpose_proto', default='openpose_pose_coco.prototxt', help='Path to OpenPose .prototxt model was trained on COCO dataset.') +parser.add_argument('--openpose_model', default='openpose_pose_coco.caffemodel', help='Path to OpenPose .caffemodel model was trained on COCO dataset.') +parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help="Choose one of computation backends: " + "%d: automatically (by default), " + "%d: Halide language (http://halide-lang.org/), " + "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "%d: OpenCV implementation" % backends) +parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Choose one of target computation devices: ' + '%d: CPU target (by default), ' + '%d: OpenCL, ' + '%d: OpenCL fp16 (half-float precision), ' + '%d: VPU' % targets) +args, _ = parser.parse_known_args() + + +def get_pose_map(image, proto_path, model_path, backend, target, height=256, width=192): + radius = 5 + inp = cv.dnn.blobFromImage(image, 1.0 / 255, (width, height)) + + net = cv.dnn.readNet(proto_path, model_path) + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + net.setInput(inp) + out = net.forward() + + threshold = 0.1 + _, out_c, out_h, out_w = out.shape + pose_map = np.zeros((height, width, out_c - 1)) + # last label: Background + for i in range(0, out.shape[1] - 1): + heatMap = out[0, i, :, :] + keypoint = np.full((height, width), -1) + _, conf, _, point = cv.minMaxLoc(heatMap) + x = width * point[0] // out_w + y = height * point[1] // out_h + if conf > threshold and x > 0 and y > 0: + keypoint[y - radius:y + radius, x - radius:x + radius] = 1 + pose_map[:, :, i] = keypoint + + pose_map = pose_map.transpose(2, 0, 1) + return pose_map + + +class BilinearFilter(object): + """ + PIL bilinear resize implementation + image = image.resize((image_width // 16, image_height // 16), Image.BILINEAR) + """ + def _precompute_coeffs(self, inSize, outSize): + filterscale = max(1.0, inSize / outSize) + ksize = int(np.ceil(filterscale)) * 2 + 1 + + kk = np.zeros(shape=(outSize * ksize, ), dtype=np.float32) + bounds = np.empty(shape=(outSize * 2, ), dtype=np.int32) + + centers = (np.arange(outSize) + 0.5) * filterscale + 0.5 + bounds[::2] = np.where(centers - filterscale < 0, 0, centers - filterscale) + bounds[1::2] = np.where(centers + filterscale > inSize, inSize, centers + filterscale) - bounds[::2] + xmins = bounds[::2] - centers + 1 + + points = np.array([np.arange(row) + xmins[i] for i, row in enumerate(bounds[1::2])]) / filterscale + for xx in range(0, outSize): + point = points[xx] + bilinear = np.where(point < 1.0, 1.0 - abs(point), 0.0) + ww = np.sum(bilinear) + kk[xx * ksize : xx * ksize + bilinear.size] = np.where(ww == 0.0, bilinear, bilinear / ww) + return bounds, kk, ksize + + def _resample_horizontal(self, out, img, ksize, bounds, kk): + for yy in range(0, out.shape[0]): + for xx in range(0, out.shape[1]): + xmin = bounds[xx * 2 + 0] + xmax = bounds[xx * 2 + 1] + k = kk[xx * ksize : xx * ksize + xmax] + out[yy, xx] = np.round(np.sum(img[yy, xmin : xmin + xmax] * k)) + + def _resample_vertical(self, out, img, ksize, bounds, kk): + for yy in range(0, out.shape[0]): + ymin = bounds[yy * 2 + 0] + ymax = bounds[yy * 2 + 1] + k = kk[yy * ksize: yy * ksize + ymax] + out[yy] = np.round(np.sum(img[ymin : ymin + ymax, 0:out.shape[1]] * k[:, np.newaxis], axis=0)) + + def imaging_resample(self, img, xsize, ysize): + height, width, *args = img.shape + bounds_horiz, kk_horiz, ksize_horiz = self._precompute_coeffs(width, xsize) + bounds_vert, kk_vert, ksize_vert = self._precompute_coeffs(height, ysize) + + out_hor = np.empty((img.shape[0], xsize), dtype=np.uint8) + self._resample_horizontal(out_hor, img, ksize_horiz, bounds_horiz, kk_horiz) + out = np.empty((ysize, xsize), dtype=np.uint8) + self._resample_vertical(out, out_hor, ksize_vert, bounds_vert, kk_vert) + return out + + +class CpVton(object): + def __init__(self, gmm_model, tom_model, backend, target): + super(CpVton, self).__init__() + self.gmm_net = cv.dnn.readNet(gmm_model) + self.tom_net = cv.dnn.readNet(tom_model) + self.gmm_net.setPreferableBackend(backend) + self.gmm_net.setPreferableTarget(target) + self.tom_net.setPreferableBackend(backend) + self.tom_net.setPreferableTarget(target) + + def prepare_agnostic(self, segm_image, input_image, pose_map, height=256, width=192): + palette = { + 'Background' : (0, 0, 0), + 'Hat' : (128, 0, 0), + 'Hair' : (255, 0, 0), + 'Glove' : (0, 85, 0), + 'Sunglasses' : (170, 0, 51), + 'UpperClothes' : (255, 85, 0), + 'Dress' : (0, 0, 85), + 'Coat' : (0, 119, 221), + 'Socks' : (85, 85, 0), + 'Pants' : (0, 85, 85), + 'Jumpsuits' : (85, 51, 0), + 'Scarf' : (52, 86, 128), + 'Skirt' : (0, 128, 0), + 'Face' : (0, 0, 255), + 'Left-arm' : (51, 170, 221), + 'Right-arm' : (0, 255, 255), + 'Left-leg' : (85, 255, 170), + 'Right-leg' : (170, 255, 85), + 'Left-shoe' : (255, 255, 0), + 'Right-shoe' : (255, 170, 0) + } + color2label = {val: key for key, val in palette.items()} + head_labels = ['Hat', 'Hair', 'Sunglasses', 'Face', 'Pants', 'Skirt'] + + segm_image = cv.cvtColor(segm_image, cv.COLOR_BGR2RGB) + phead = np.zeros((1, height, width), dtype=np.float32) + pose_shape = np.zeros((height, width), dtype=np.uint8) + for r in range(height): + for c in range(width): + pixel = tuple(segm_image[r, c]) + if tuple(pixel) in color2label: + if color2label[pixel] in head_labels: + phead[0, r, c] = 1 + if color2label[pixel] != 'Background': + pose_shape[r, c] = 255 + + input_image = cv.dnn.blobFromImage(input_image, 1.0 / 127.5, (width, height), mean=(127.5, 127.5, 127.5), swapRB=True) + input_image = input_image.squeeze(0) + + img_head = input_image * phead - (1 - phead) + + downsample = BilinearFilter() + down = downsample.imaging_resample(pose_shape, width // 16, height // 16) + res_shape = cv.resize(down, (width, height), cv.INTER_LINEAR) + + res_shape = cv.dnn.blobFromImage(res_shape, 1.0 / 127.5, mean=(127.5, 127.5, 127.5), swapRB=True) + res_shape = res_shape.squeeze(0) + + agnostic = np.concatenate((res_shape, img_head, pose_map), axis=0) + agnostic = np.expand_dims(agnostic, axis=0) + return agnostic + + def get_warped_cloth(self, cloth_img, agnostic, height=256, width=192): + cloth = cv.dnn.blobFromImage(cloth_img, 1.0 / 127.5, (width, height), mean=(127.5, 127.5, 127.5), swapRB=True) + + self.gmm_net.setInput(agnostic, "input.1") + self.gmm_net.setInput(cloth, "input.18") + theta = self.gmm_net.forward() + + grid = self._generate_grid(theta) + warped_cloth = self._bilinear_sampler(cloth, grid).astype(np.float32) + return warped_cloth + + def get_tryon(self, agnostic, warp_cloth): + inp = np.concatenate([agnostic, warp_cloth], axis=1) + self.tom_net.setInput(inp) + out = self.tom_net.forward() + + p_rendered, m_composite = np.split(out, [3], axis=1) + p_rendered = np.tanh(p_rendered) + m_composite = 1 / (1 + np.exp(-m_composite)) + + p_tryon = warp_cloth * m_composite + p_rendered * (1 - m_composite) + rgb_p_tryon = cv.cvtColor(p_tryon.squeeze(0).transpose(1, 2, 0), cv.COLOR_BGR2RGB) + rgb_p_tryon = (rgb_p_tryon + 1) / 2 + return rgb_p_tryon + + def _compute_L_inverse(self, X, Y): + N = X.shape[0] + + Xmat = np.tile(X, (1, N)) + Ymat = np.tile(Y, (1, N)) + P_dist_squared = np.power(Xmat - Xmat.transpose(1, 0), 2) + np.power(Ymat - Ymat.transpose(1, 0), 2) + + P_dist_squared[P_dist_squared == 0] = 1 + K = np.multiply(P_dist_squared, np.log(P_dist_squared)) + + O = np.ones([N, 1], dtype=np.float32) + Z = np.zeros([3, 3], dtype=np.float32) + P = np.concatenate([O, X, Y], axis=1) + first = np.concatenate((K, P), axis=1) + second = np.concatenate((P.transpose(1, 0), Z), axis=1) + L = np.concatenate((first, second), axis=0) + Li = linalg.inv(L) + return Li + + def _prepare_to_transform(self, out_h=256, out_w=192, grid_size=5): + grid = np.zeros([out_h, out_w, 3], dtype=np.float32) + grid_X, grid_Y = np.meshgrid(np.linspace(-1, 1, out_w), np.linspace(-1, 1, out_h)) + grid_X = np.expand_dims(np.expand_dims(grid_X, axis=0), axis=3) + grid_Y = np.expand_dims(np.expand_dims(grid_Y, axis=0), axis=3) + + axis_coords = np.linspace(-1, 1, grid_size) + N = grid_size ** 2 + P_Y, P_X = np.meshgrid(axis_coords, axis_coords) + + P_X = np.reshape(P_X,(-1, 1)) + P_Y = np.reshape(P_Y,(-1, 1)) + + P_X = np.expand_dims(np.expand_dims(np.expand_dims(P_X, axis=2), axis=3), axis=4).transpose(4, 1, 2, 3, 0) + P_Y = np.expand_dims(np.expand_dims(np.expand_dims(P_Y, axis=2), axis=3), axis=4).transpose(4, 1, 2, 3, 0) + return grid_X, grid_Y, N, P_X, P_Y + + def _expand_torch(self, X, shape): + if len(X.shape) != len(shape): + return X.flatten().reshape(shape) + else: + axis = [1 if src == dst else dst for src, dst in zip(X.shape, shape)] + return np.tile(X, axis) + + def _apply_transformation(self, theta, points, N, P_X, P_Y): + if len(theta.shape) == 2: + theta = np.expand_dims(np.expand_dims(theta, axis=2), axis=3) + + batch_size = theta.shape[0] + + P_X_base = np.copy(P_X) + P_Y_base = np.copy(P_Y) + + Li = self._compute_L_inverse(np.reshape(P_X, (N, -1)), np.reshape(P_Y, (N, -1))) + Li = np.expand_dims(Li, axis=0) + + # split theta into point coordinates + Q_X = np.squeeze(theta[:, :N, :, :], axis=3) + Q_Y = np.squeeze(theta[:, N:, :, :], axis=3) + + Q_X += self._expand_torch(P_X_base, Q_X.shape) + Q_Y += self._expand_torch(P_Y_base, Q_Y.shape) + + points_b = points.shape[0] + points_h = points.shape[1] + points_w = points.shape[2] + + P_X = self._expand_torch(P_X, (1, points_h, points_w, 1, N)) + P_Y = self._expand_torch(P_Y, (1, points_h, points_w, 1, N)) + + W_X = self._expand_torch(Li[:,:N,:N], (batch_size, N, N)) @ Q_X + W_Y = self._expand_torch(Li[:,:N,:N], (batch_size, N, N)) @ Q_Y + + W_X = np.expand_dims(np.expand_dims(W_X, axis=3), axis=4).transpose(0, 4, 2, 3, 1) + W_X = np.repeat(W_X, points_h, axis=1) + W_X = np.repeat(W_X, points_w, axis=2) + + W_Y = np.expand_dims(np.expand_dims(W_Y, axis=3), axis=4).transpose(0, 4, 2, 3, 1) + W_Y = np.repeat(W_Y, points_h, axis=1) + W_Y = np.repeat(W_Y, points_w, axis=2) + + A_X = self._expand_torch(Li[:, N:, :N], (batch_size, 3, N)) @ Q_X + A_Y = self._expand_torch(Li[:, N:, :N], (batch_size, 3, N)) @ Q_Y + + A_X = np.expand_dims(np.expand_dims(A_X, axis=3), axis=4).transpose(0, 4, 2, 3, 1) + A_X = np.repeat(A_X, points_h, axis=1) + A_X = np.repeat(A_X, points_w, axis=2) + + A_Y = np.expand_dims(np.expand_dims(A_Y, axis=3), axis=4).transpose(0, 4, 2, 3, 1) + A_Y = np.repeat(A_Y, points_h, axis=1) + A_Y = np.repeat(A_Y, points_w, axis=2) + + points_X_for_summation = np.expand_dims(np.expand_dims(points[:, :, :, 0], axis=3), axis=4) + points_X_for_summation = self._expand_torch(points_X_for_summation, points[:, :, :, 0].shape + (1, N)) + + points_Y_for_summation = np.expand_dims(np.expand_dims(points[:, :, :, 1], axis=3), axis=4) + points_Y_for_summation = self._expand_torch(points_Y_for_summation, points[:, :, :, 0].shape + (1, N)) + + if points_b == 1: + delta_X = points_X_for_summation - P_X + delta_Y = points_Y_for_summation - P_Y + else: + delta_X = points_X_for_summation - self._expand_torch(P_X, points_X_for_summation.shape) + delta_Y = points_Y_for_summation - self._expand_torch(P_Y, points_Y_for_summation.shape) + + dist_squared = np.power(delta_X, 2) + np.power(delta_Y, 2) + dist_squared[dist_squared == 0] = 1 + U = np.multiply(dist_squared, np.log(dist_squared)) + + points_X_batch = np.expand_dims(points[:,:,:,0], axis=3) + points_Y_batch = np.expand_dims(points[:,:,:,1], axis=3) + + if points_b == 1: + points_X_batch = self._expand_torch(points_X_batch, (batch_size, ) + points_X_batch.shape[1:]) + points_Y_batch = self._expand_torch(points_Y_batch, (batch_size, ) + points_Y_batch.shape[1:]) + + points_X_prime = A_X[:,:,:,:,0]+ \ + np.multiply(A_X[:,:,:,:,1], points_X_batch) + \ + np.multiply(A_X[:,:,:,:,2], points_Y_batch) + \ + np.sum(np.multiply(W_X, self._expand_torch(U, W_X.shape)), 4) + + points_Y_prime = A_Y[:,:,:,:,0]+ \ + np.multiply(A_Y[:,:,:,:,1], points_X_batch) + \ + np.multiply(A_Y[:,:,:,:,2], points_Y_batch) + \ + np.sum(np.multiply(W_Y, self._expand_torch(U, W_Y.shape)), 4) + + return np.concatenate((points_X_prime, points_Y_prime), 3) + + def _generate_grid(self, theta): + grid_X, grid_Y, N, P_X, P_Y = self._prepare_to_transform() + warped_grid = self._apply_transformation(theta, np.concatenate((grid_X, grid_Y), axis=3), N, P_X, P_Y) + return warped_grid + + def _bilinear_sampler(self, img, grid): + x, y = grid[:,:,:,0], grid[:,:,:,1] + + H = img.shape[2] + W = img.shape[3] + max_y = H - 1 + max_x = W - 1 + + # rescale x and y to [0, W-1/H-1] + x = 0.5 * (x + 1.0) * (max_x - 1) + y = 0.5 * (y + 1.0) * (max_y - 1) + + # grab 4 nearest corner points for each (x_i, y_i) + x0 = np.floor(x).astype(int) + x1 = x0 + 1 + y0 = np.floor(y).astype(int) + y1 = y0 + 1 + + # calculate deltas + wa = (x1 - x) * (y1 - y) + wb = (x1 - x) * (y - y0) + wc = (x - x0) * (y1 - y) + wd = (x - x0) * (y - y0) + + # clip to range [0, H-1/W-1] to not violate img boundaries + x0 = np.clip(x0, 0, max_x) + x1 = np.clip(x1, 0, max_x) + y0 = np.clip(y0, 0, max_y) + y1 = np.clip(y1, 0, max_y) + + # get pixel value at corner coords + img = img.reshape(-1, H, W) + Ia = img[:, y0, x0].swapaxes(0, 1) + Ib = img[:, y1, x0].swapaxes(0, 1) + Ic = img[:, y0, x1].swapaxes(0, 1) + Id = img[:, y1, x1].swapaxes(0, 1) + + wa = np.expand_dims(wa, axis=0) + wb = np.expand_dims(wb, axis=0) + wc = np.expand_dims(wc, axis=0) + wd = np.expand_dims(wd, axis=0) + + # compute output + out = wa*Ia + wb*Ib + wc*Ic + wd*Id + return out + + +class CorrelationLayer(object): + def __init__(self, params, blobs): + super(CorrelationLayer, self).__init__() + + def getMemoryShapes(self, inputs): + fetureAShape = inputs[0] + b, c, h, w = fetureAShape + return [[b, h * w, h, w]] + + def forward(self, inputs): + feature_A, feature_B = inputs + b, c, h, w = feature_A.shape + feature_A = feature_A.transpose(0, 1, 3, 2) + feature_A = np.reshape(feature_A, (b, c, h * w)) + feature_B = np.reshape(feature_B, (b, c, h * w)) + feature_B = feature_B.transpose(0, 2, 1) + feature_mul = feature_B @ feature_A + feature_mul= np.reshape(feature_mul, (b, h, w, h * w)) + feature_mul = feature_mul.transpose(0, 1, 3, 2) + correlation_tensor = feature_mul.transpose(0, 2, 1, 3) + correlation_tensor = np.ascontiguousarray(correlation_tensor) + return [correlation_tensor] + + +if __name__ == "__main__": + if not os.path.isfile(args.gmm_model): + raise OSError("GMM model not exist") + if not os.path.isfile(args.tom_model): + raise OSError("TOM model not exist") + if not os.path.isfile(args.segmentation_model): + raise OSError("Segmentation model not exist") + if not os.path.isfile(findFile(args.openpose_proto)): + raise OSError("OpenPose proto not exist") + if not os.path.isfile(findFile(args.openpose_model)): + raise OSError("OpenPose model not exist") + + person_img = cv.imread(args.input_image) + ratio = 256 / 192 + inp_h, inp_w, _ = person_img.shape + current_ratio = inp_h / inp_w + if current_ratio > ratio: + center_h = inp_h // 2 + out_h = inp_w * ratio + start = int(center_h - out_h // 2) + end = int(center_h + out_h // 2) + person_img = person_img[start:end, ...] + else: + center_w = inp_w // 2 + out_w = inp_h / ratio + start = int(center_w - out_w // 2) + end = int(center_w + out_w // 2) + person_img = person_img[:, start:end, :] + + cloth_img = cv.imread(args.input_cloth) + pose = get_pose_map(person_img, findFile(args.openpose_proto), + findFile(args.openpose_model), args.backend, args.target) + segm_image = parse_human(person_img, args.segmentation_model) + segm_image = cv.resize(segm_image, (192, 256), cv.INTER_LINEAR) + + cv.dnn_registerLayer('Correlation', CorrelationLayer) + + model = CpVton(args.gmm_model, args.tom_model, args.backend, args.target) + agnostic = model.prepare_agnostic(segm_image, person_img, pose) + warped_cloth = model.get_warped_cloth(cloth_img, agnostic) + output = model.get_tryon(agnostic, warped_cloth) + + cv.dnn_unregisterLayer('Correlation') + + winName = 'Virtual Try-On' + cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) + cv.imshow(winName, output) + cv.waitKey() diff --git a/samples/python/drawing.py b/samples/python/drawing.py new file mode 100644 index 0000000000..e5db400dc0 --- /dev/null +++ b/samples/python/drawing.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python +''' + This program demonstrates OpenCV drawing and text output functions by drawing different shapes and text strings + Usage : + python3 drawing.py + Press any button to exit + ''' + +# Python 2/3 compatibility +from __future__ import print_function + +import numpy as np +import cv2 as cv + +# Drawing Lines +def lines(): + for i in range(NUMBER*2): + pt1, pt2 = [], [] + pt1.append(np.random.randint(x1, x2)) + pt1.append(np.random.randint(y1, y2)) + pt2.append(np.random.randint(x1, x2)) + pt2.append(np.random.randint(y1, y2)) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + arrowed = np.random.randint(0, 6) + if (arrowed<3): + cv.line(image, tuple(pt1), tuple(pt2), color, np.random.randint(1, 10), lineType) + else: + cv.arrowedLine(image, tuple(pt1), tuple(pt2), color, np.random.randint(1, 10), lineType) + cv.imshow(wndname, image) + if cv.waitKey(DELAY)>=0: + return + +# Drawing Rectangle +def rectangle(): + for i in range(NUMBER*2): + pt1, pt2 = [], [] + pt1.append(np.random.randint(x1, x2)) + pt1.append(np.random.randint(y1, y2)) + pt2.append(np.random.randint(x1, x2)) + pt2.append(np.random.randint(y1, y2)) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + thickness = np.random.randint(-3, 10) + marker = np.random.randint(0, 10) + marker_size = np.random.randint(30, 80) + + if (marker > 5): + cv.rectangle(image, tuple(pt1), tuple(pt2), color, max(thickness, -1), lineType) + else: + cv.drawMarker(image, tuple(pt1), color, marker, marker_size) + cv.imshow(wndname, image) + if cv.waitKey(DELAY)>=0: + return + +# Drawing ellipse +def ellipse(): + for i in range(NUMBER*2): + center = [] + center.append(np.random.randint(x1, x2)) + center.append(np.random.randint(x1, x2)) + axes = [] + axes.append(np.random.randint(0, 200)) + axes.append(np.random.randint(0, 200)) + angle = np.random.randint(0, 180) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + thickness = np.random.randint(-1, 9) + cv.ellipse(image, tuple(center), tuple(axes), angle, angle-100, angle + 200, color, thickness, lineType) + cv.imshow(wndname, image) + if cv.waitKey(DELAY)>=0: + return + +# Drawing Polygonal Curves +def polygonal(): + for i in range(NUMBER): + pt = [(0, 0)]*6 + pt = np.resize(pt, (2, 3, 2)) + pt[0][0][0] = np.random.randint(x1, x2) + pt[0][0][1] = np.random.randint(y1, y2) + pt[0][1][0] = np.random.randint(x1, x2) + pt[0][1][1] = np.random.randint(y1, y2) + pt[0][2][0] = np.random.randint(x1, x2) + pt[0][2][1] = np.random.randint(y1, y2) + pt[1][0][0] = np.random.randint(x1, x2) + pt[1][0][1] = np.random.randint(y1, y2) + pt[1][1][0] = np.random.randint(x1, x2) + pt[1][1][1] = np.random.randint(y1, y2) + pt[1][2][0] = np.random.randint(x1, x2) + pt[1][2][1] = np.random.randint(y1, y2) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + alist = [] + for k in pt[0]: + alist.append(k) + for k in pt[1]: + alist.append(k) + ppt = np.array(alist) + cv.polylines(image, [ppt], True, color, thickness = np.random.randint(1, 10), lineType = lineType) + cv.imshow(wndname, image) + if cv.waitKey(DELAY) >= 0: + return + +# fills an area bounded by several polygonal contours +def fill(): + for i in range(NUMBER): + pt = [(0, 0)]*6 + pt = np.resize(pt, (2, 3, 2)) + pt[0][0][0] = np.random.randint(x1, x2) + pt[0][0][1] = np.random.randint(y1, y2) + pt[0][1][0] = np.random.randint(x1, x2) + pt[0][1][1] = np.random.randint(y1, y2) + pt[0][2][0] = np.random.randint(x1, x2) + pt[0][2][1] = np.random.randint(y1, y2) + pt[1][0][0] = np.random.randint(x1, x2) + pt[1][0][1] = np.random.randint(y1, y2) + pt[1][1][0] = np.random.randint(x1, x2) + pt[1][1][1] = np.random.randint(y1, y2) + pt[1][2][0] = np.random.randint(x1, x2) + pt[1][2][1] = np.random.randint(y1, y2) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + alist = [] + for k in pt[0]: + alist.append(k) + for k in pt[1]: + alist.append(k) + ppt = np.array(alist) + cv.fillPoly(image, [ppt], color, lineType) + cv.imshow(wndname, image) + if cv.waitKey(DELAY) >= 0: + return + +# Drawing Circles +def circles(): + for i in range(NUMBER): + center = [] + center.append(np.random.randint(x1, x2)) + center.append(np.random.randint(x1, x2)) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + cv.circle(image, tuple(center), np.random.randint(0, 300), color, np.random.randint(-1, 9), lineType) + cv.imshow(wndname, image) + if cv.waitKey(DELAY) >= 0: + return + +# Draws a text string +def string(): + for i in range(NUMBER): + org = [] + org.append(np.random.randint(x1, x2)) + org.append(np.random.randint(x1, x2)) + color = "%06x" % np.random.randint(0, 0xFFFFFF) + color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4)) + cv.putText(image, "Testing text rendering", tuple(org), np.random.randint(0, 8), np.random.randint(0, 100)*0.05+0.1, color, np.random.randint(1, 10), lineType) + cv.imshow(wndname, image) + if cv.waitKey(DELAY) >= 0: + return + + +def string1(): + textsize = cv.getTextSize("OpenCV forever!", cv.FONT_HERSHEY_COMPLEX, 3, 5) + org = (int((width - textsize[0][0])/2), int((height - textsize[0][1])/2)) + for i in range(0, 255, 2): + image2 = np.array(image) - i + cv.putText(image2, "OpenCV forever!", org, cv.FONT_HERSHEY_COMPLEX, 3, (i, i, 255), 5, lineType) + cv.imshow(wndname, image2) + if cv.waitKey(DELAY) >= 0: + return + +if __name__ == '__main__': + print(__doc__) + wndname = "Drawing Demo" + NUMBER = 100 + DELAY = 5 + width, height = 1000, 700 + lineType = cv.LINE_AA # change it to LINE_8 to see non-antialiased graphics + x1, x2, y1, y2 = -width/2, width*3/2, -height/2, height*3/2 + image = np.zeros((height, width, 3), dtype = np.uint8) + cv.imshow(wndname, image) + cv.waitKey(DELAY) + lines() + rectangle() + ellipse() + polygonal() + fill() + circles() + string() + string1() + cv.waitKey(0) + cv.destroyAllWindows() \ No newline at end of file diff --git a/samples/python/grabcut.py b/samples/python/grabcut.py index f63c0cb387..e2a035a91c 100644 --- a/samples/python/grabcut.py +++ b/samples/python/grabcut.py @@ -11,10 +11,10 @@ USAGE: README FIRST: Two windows will show up, one for input and one for output. - At first, in input window, draw a rectangle around the object using -mouse right button. Then press 'n' to segment the object (once or a few times) + At first, in input window, draw a rectangle around the object using the +right mouse button. Then press 'n' to segment the object (once or a few times) For any finer touch-ups, you can press any of the keys below and draw lines on -the areas you want. Then again press 'n' for updating the output. +the areas you want. Then again press 'n' to update the output. Key '0' - To select areas of sure background Key '1' - To select areas of sure foreground @@ -44,8 +44,8 @@ class App(): DRAW_BG = {'color' : BLACK, 'val' : 0} DRAW_FG = {'color' : WHITE, 'val' : 1} - DRAW_PR_FG = {'color' : GREEN, 'val' : 3} DRAW_PR_BG = {'color' : RED, 'val' : 2} + DRAW_PR_FG = {'color' : GREEN, 'val' : 3} # setting up flags rect = (0,0,1,1) @@ -160,14 +160,12 @@ class App(): print(""" For finer touchups, mark foreground and background after pressing keys 0-3 and again press 'n' \n""") try: + bgdmodel = np.zeros((1, 65), np.float64) + fgdmodel = np.zeros((1, 65), np.float64) if (self.rect_or_mask == 0): # grabcut with rect - bgdmodel = np.zeros((1, 65), np.float64) - fgdmodel = np.zeros((1, 65), np.float64) cv.grabCut(self.img2, self.mask, self.rect, bgdmodel, fgdmodel, 1, cv.GC_INIT_WITH_RECT) self.rect_or_mask = 1 - elif self.rect_or_mask == 1: # grabcut with mask - bgdmodel = np.zeros((1, 65), np.float64) - fgdmodel = np.zeros((1, 65), np.float64) + elif (self.rect_or_mask == 1): # grabcut with mask cv.grabCut(self.img2, self.mask, self.rect, bgdmodel, fgdmodel, 1, cv.GC_INIT_WITH_MASK) except: import traceback diff --git a/samples/python/laplace.py b/samples/python/laplace.py new file mode 100644 index 0000000000..f485e5741c --- /dev/null +++ b/samples/python/laplace.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +''' + This program demonstrates Laplace point/edge detection using + OpenCV function Laplacian() + It captures from the camera of your choice: 0, 1, ... default 0 + Usage: + python laplace.py + If no arguments given default arguments will be used. + + Keyboard Shortcuts: + Press space bar to exit the program. + ''' + +# Python 2/3 compatibility +from __future__ import print_function + +import numpy as np +import cv2 as cv +import sys + +def main(): + # Declare the variables we are going to use + ddepth = cv.CV_16S + smoothType = "MedianBlur" + sigma = 3 + if len(sys.argv)==4: + ddepth = sys.argv[1] + smoothType = sys.argv[2] + sigma = sys.argv[3] + # Taking input from the camera + cap=cv.VideoCapture(0) + # Create Window and Trackbar + cv.namedWindow("Laplace of Image", cv.WINDOW_AUTOSIZE) + cv.createTrackbar("Kernel Size Bar", "Laplace of Image", sigma, 15, lambda x:x) + # Printing frame width, height and FPS + print("=="*40) + print("Frame Width: ", cap.get(cv.CAP_PROP_FRAME_WIDTH), "Frame Height: ", cap.get(cv.CAP_PROP_FRAME_HEIGHT), "FPS: ", cap.get(cv.CAP_PROP_FPS)) + while True: + # Reading input from the camera + ret, frame = cap.read() + if ret == False: + print("Can't open camera/video stream") + break + # Taking input/position from the trackbar + sigma = cv.getTrackbarPos("Kernel Size Bar", "Laplace of Image") + # Setting kernel size + ksize = (sigma*5)|1 + # Removing noise by blurring with a filter + if smoothType == "GAUSSIAN": + smoothed = cv.GaussianBlur(frame, (ksize, ksize), sigma, sigma) + if smoothType == "BLUR": + smoothed = cv.blur(frame, (ksize, ksize)) + if smoothType == "MedianBlur": + smoothed = cv.medianBlur(frame, ksize) + + # Apply Laplace function + laplace = cv.Laplacian(smoothed, ddepth, 5) + # Converting back to uint8 + result = cv.convertScaleAbs(laplace, (sigma+1)*0.25) + # Display Output + cv.imshow("Laplace of Image", result) + k = cv.waitKey(30) + if k == 27: + return +if __name__ == "__main__": + print(__doc__) + main() + cv.destroyAllWindows()