diff --git a/apps/interactive-calibration/calibCommon.hpp b/apps/interactive-calibration/calibCommon.hpp
index 6acf610897..f422ff0531 100644
--- a/apps/interactive-calibration/calibCommon.hpp
+++ b/apps/interactive-calibration/calibCommon.hpp
@@ -80,7 +80,7 @@ namespace calib
         cv::Size boardSize;
         int charucoDictName;
         int calibrationStep;
-        float charucoSquareLenght, charucoMarkerSize;
+        float charucoSquareLength, charucoMarkerSize;
         float captureDelay;
         float squareSize;
         float templDst;
diff --git a/apps/interactive-calibration/defaultConfig.xml b/apps/interactive-calibration/defaultConfig.xml
index d14ba865d3..cad201fba1 100644
--- a/apps/interactive-calibration/defaultConfig.xml
+++ b/apps/interactive-calibration/defaultConfig.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <opencv_storage>
 <charuco_dict>0</charuco_dict>
-<charuco_square_lenght>200</charuco_square_lenght>
+<charuco_square_length>200</charuco_square_length>
 <charuco_marker_size>100</charuco_marker_size>
 <calibration_step>1</calibration_step>
 <max_frames_num>30</max_frames_num>
diff --git a/apps/interactive-calibration/frameProcessor.cpp b/apps/interactive-calibration/frameProcessor.cpp
index aecc2c0770..90904a42cd 100644
--- a/apps/interactive-calibration/frameProcessor.cpp
+++ b/apps/interactive-calibration/frameProcessor.cpp
@@ -273,7 +273,7 @@ CalibProcessor::CalibProcessor(cv::Ptr<calibrationData> data, captureParameters
 #ifdef HAVE_OPENCV_ARUCO
         mArucoDictionary = cv::aruco::getPredefinedDictionary(
                     cv::aruco::PREDEFINED_DICTIONARY_NAME(capParams.charucoDictName));
-        mCharucoBoard = cv::aruco::CharucoBoard::create(mBoardSize.width, mBoardSize.height, capParams.charucoSquareLenght,
+        mCharucoBoard = cv::aruco::CharucoBoard::create(mBoardSize.width, mBoardSize.height, capParams.charucoSquareLength,
                                                         capParams.charucoMarkerSize, mArucoDictionary);
 #endif
         break;
diff --git a/apps/interactive-calibration/main.cpp b/apps/interactive-calibration/main.cpp
index f3d1e5d66b..6468c88ce7 100644
--- a/apps/interactive-calibration/main.cpp
+++ b/apps/interactive-calibration/main.cpp
@@ -181,7 +181,7 @@ int main(int argc, char** argv)
                             cv::aruco::getPredefinedDictionary(cv::aruco::PREDEFINED_DICTIONARY_NAME(capParams.charucoDictName));
                     cv::Ptr<cv::aruco::CharucoBoard> charucoboard =
                                 cv::aruco::CharucoBoard::create(capParams.boardSize.width, capParams.boardSize.height,
-                                                                capParams.charucoSquareLenght, capParams.charucoMarkerSize, dictionary);
+                                                                capParams.charucoSquareLength, capParams.charucoMarkerSize, dictionary);
                     globalData->totalAvgErr =
                             cv::aruco::calibrateCameraCharuco(globalData->allCharucoCorners, globalData->allCharucoIds,
                                                            charucoboard, globalData->imageSize,
diff --git a/apps/interactive-calibration/parametersController.cpp b/apps/interactive-calibration/parametersController.cpp
index 8b44ba0409..c76b915c63 100644
--- a/apps/interactive-calibration/parametersController.cpp
+++ b/apps/interactive-calibration/parametersController.cpp
@@ -37,7 +37,10 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName)
     }
 
     readFromNode(reader["charuco_dict"], mCapParams.charucoDictName);
-    readFromNode(reader["charuco_square_lenght"], mCapParams.charucoSquareLenght);
+    if (readFromNode(reader["charuco_square_lenght"], mCapParams.charucoSquareLength)) {
+        std::cout << "DEPRECATION: Parameter 'charuco_square_lenght' has been deprecated (typo). Use 'charuco_square_length' instead." << std::endl;
+    }
+    readFromNode(reader["charuco_square_length"], mCapParams.charucoSquareLength);
     readFromNode(reader["charuco_marker_size"], mCapParams.charucoMarkerSize);
     readFromNode(reader["camera_resolution"], mCapParams.cameraResolution);
     readFromNode(reader["calibration_step"], mCapParams.calibrationStep);
@@ -51,7 +54,7 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName)
     bool retValue =
             checkAssertion(mCapParams.charucoDictName >= 0, "Dict name must be >= 0") &&
             checkAssertion(mCapParams.charucoMarkerSize > 0, "Marker size must be positive") &&
-            checkAssertion(mCapParams.charucoSquareLenght > 0, "Square size must be positive") &&
+            checkAssertion(mCapParams.charucoSquareLength > 0, "Square size must be positive") &&
             checkAssertion(mCapParams.minFramesNum > 1, "Minimal number of frames for calibration < 1") &&
             checkAssertion(mCapParams.calibrationStep > 0, "Calibration step must be positive") &&
             checkAssertion(mCapParams.maxFramesNum > mCapParams.minFramesNum, "maxFramesNum < minFramesNum") &&
@@ -119,7 +122,7 @@ bool calib::parametersController::loadFromParser(cv::CommandLineParser &parser)
         mCapParams.board = chAruco;
         mCapParams.boardSize = cv::Size(6, 8);
         mCapParams.charucoDictName = 0;
-        mCapParams.charucoSquareLenght = 200;
+        mCapParams.charucoSquareLength = 200;
         mCapParams.charucoMarkerSize = 100;
     }
     else {
diff --git a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown
index 0c4d67ec85..8280262cdd 100644
--- a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown
+++ b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown
@@ -64,7 +64,7 @@ By default values of advanced parameters are stored in defaultConfig.xml
 <?xml version="1.0"?>
 <opencv_storage>
 <charuco_dict>0</charuco_dict>
-<charuco_square_lenght>200</charuco_square_lenght>
+<charuco_square_length>200</charuco_square_length>
 <charuco_marker_size>100</charuco_marker_size>
 <calibration_step>1</calibration_step>
 <max_frames_num>30</max_frames_num>
@@ -78,7 +78,7 @@ By default values of advanced parameters are stored in defaultConfig.xml
 @endcode
 
 -  *charuco_dict*: name of special dictionary, which has been used for generation of chAruco pattern
--  *charuco_square_lenght*: size of square on chAruco board (in pixels)
+-  *charuco_square_length*: size of square on chAruco board (in pixels)
 -  *charuco_marker_size*: size of Aruco markers on chAruco board (in pixels)
 -  *calibration_step*: interval in frames between launches of @ref cv::calibrateCamera
 -  *max_frames_num*: if number of frames for calibration is greater then this value frames filter starts working.
@@ -91,7 +91,7 @@ QR faster than SVD, but potentially less precise
 -  *frame_filter_conv_param*: parameter which used in linear convolution of bicriterial frames filter
 -  *camera_resolution*: resolution of camera which is used for calibration
 
-**Note:** *charuco_dict*, *charuco_square_lenght* and *charuco_marker_size* are used for chAruco pattern generation
+**Note:** *charuco_dict*, *charuco_square_length* and *charuco_marker_size* are used for chAruco pattern generation
 (see Aruco module description for details: [Aruco tutorials](https://github.com/opencv/opencv_contrib/tree/master/modules/aruco/tutorials))
 
 Default chAruco pattern:
diff --git a/modules/calib3d/src/fundam.cpp b/modules/calib3d/src/fundam.cpp
index c5f93bcde5..5363e949fd 100644
--- a/modules/calib3d/src/fundam.cpp
+++ b/modules/calib3d/src/fundam.cpp
@@ -490,12 +490,47 @@ static int run7Point( const Mat& _m1, const Mat& _m2, Mat& _fmatrix )
     double* fmatrix = _fmatrix.ptr<double>();
     int i, k, n;
 
+    Point2d m1c(0, 0), m2c(0, 0);
+    double t, scale1 = 0, scale2 = 0;
+    const int count = 7;
+
+    // compute centers and average distances for each of the two point sets
+    for( i = 0; i < count; i++ )
+    {
+        m1c += Point2d(m1[i]);
+        m2c += Point2d(m2[i]);
+    }
+
+    // calculate the normalizing transformations for each of the point sets:
+    // after the transformation each set will have the mass center at the coordinate origin
+    // and the average distance from the origin will be ~sqrt(2).
+    t = 1./count;
+    m1c *= t;
+    m2c *= t;
+
+    for( i = 0; i < count; i++ )
+    {
+        scale1 += norm(Point2d(m1[i].x - m1c.x, m1[i].y - m1c.y));
+        scale2 += norm(Point2d(m2[i].x - m2c.x, m2[i].y - m2c.y));
+    }
+
+    scale1 *= t;
+    scale2 *= t;
+
+    if( scale1 < FLT_EPSILON || scale2 < FLT_EPSILON )
+        return 0;
+
+    scale1 = std::sqrt(2.)/scale1;
+    scale2 = std::sqrt(2.)/scale2;
+
     // form a linear system: i-th row of A(=a) represents
     // the equation: (m2[i], 1)'*F*(m1[i], 1) = 0
     for( i = 0; i < 7; i++ )
     {
-        double x0 = m1[i].x, y0 = m1[i].y;
-        double x1 = m2[i].x, y1 = m2[i].y;
+        double x0 = (m1[i].x - m1c.x)*scale1;
+        double y0 = (m1[i].y - m1c.y)*scale1;
+        double x1 = (m2[i].x - m2c.x)*scale2;
+        double y1 = (m2[i].y - m2c.y)*scale2;
 
         a[i*9+0] = x1*x0;
         a[i*9+1] = x1*y0;
@@ -559,6 +594,10 @@ static int run7Point( const Mat& _m1, const Mat& _m2, Mat& _fmatrix )
     if( n < 1 || n > 3 )
         return n;
 
+    // transformation matrices
+    Matx33d T1( scale1, 0, -scale1*m1c.x, 0, scale1, -scale1*m1c.y, 0, 0, 1 );
+    Matx33d T2( scale2, 0, -scale2*m2c.x, 0, scale2, -scale2*m2c.y, 0, 0, 1 );
+
     for( k = 0; k < n; k++, fmatrix += 9 )
     {
         // for each root form the fundamental matrix
@@ -577,6 +616,14 @@ static int run7Point( const Mat& _m1, const Mat& _m2, Mat& _fmatrix )
 
         for( i = 0; i < 8; i++ )
             fmatrix[i] = f1[i]*lambda + f2[i]*mu;
+
+        // de-normalize
+        Mat F(3, 3, CV_64F, fmatrix);
+        F = T2.t() * F * T1;
+
+        // make F(3,3) = 1
+        if(fabs(F.at<double>(8)) > FLT_EPSILON )
+            F *= 1. / F.at<double>(8);
     }
 
     return n;
diff --git a/modules/core/src/persistence_yml.cpp b/modules/core/src/persistence_yml.cpp
index 7742e82770..3f3742b8d1 100644
--- a/modules/core/src/persistence_yml.cpp
+++ b/modules/core/src/persistence_yml.cpp
@@ -452,19 +452,19 @@ public:
             if ( d == '<') //support of full type heading from YAML 1.2
             {
                 const char* yamlTypeHeading = "<tag:yaml.org,2002:";
-                const size_t headingLenght = strlen(yamlTypeHeading);
+                const size_t headingLength = strlen(yamlTypeHeading);
 
                 char* typeEndPtr = ++ptr;
 
                 do d = *++typeEndPtr;
                 while( cv_isprint(d) && d != ' ' && d != '>' );
 
-                if ( d == '>' && (size_t)(typeEndPtr - ptr) > headingLenght )
+                if ( d == '>' && (size_t)(typeEndPtr - ptr) > headingLength )
                 {
-                    if ( memcmp(ptr, yamlTypeHeading, headingLenght) == 0 )
+                    if ( memcmp(ptr, yamlTypeHeading, headingLength) == 0 )
                     {
                         *typeEndPtr = ' ';
-                        ptr += headingLenght - 1;
+                        ptr += headingLength - 1;
                         is_user = true;
                         //value_type |= FileNode::USER;
                     }
diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp
index fdd39468a1..4a5a1f1c0a 100644
--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -75,6 +75,17 @@ static cv::String toString(const T &v)
     return ss.str();
 }
 
+static inline
+MatShape parseBlobShape(const caffe::BlobShape& _input_shape)
+{
+    MatShape shape;
+    for (int i = 0; i < _input_shape.dim_size(); i++)
+    {
+        shape.push_back((int)_input_shape.dim(i));
+    }
+    return shape;
+}
+
 class CaffeImporter
 {
     caffe::NetParameter net;
@@ -235,10 +246,7 @@ public:
         }
         else if (pbBlob.has_shape())
         {
-            const caffe::BlobShape &_shape = pbBlob.shape();
-
-            for (int i = 0; i < _shape.dim_size(); i++)
-                shape.push_back((int)_shape.dim(i));
+            shape = parseBlobShape(pbBlob.shape());
         }
         else
             shape.resize(1, 1);  // Is a scalar.
@@ -334,12 +342,49 @@ public:
 
         //setup input layer names
         std::vector<String> netInputs(net.input_size());
+        std::vector<MatShape> inp_shapes;
         {
-            for (int inNum = 0; inNum < net.input_size(); inNum++)
+            int net_input_size = net.input_size();
+            for (int inNum = 0; inNum < net_input_size; inNum++)
             {
                 addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
                 netInputs[inNum] = net.input(inNum);
             }
+
+            if (net.input_dim_size() > 0)  // deprecated in Caffe proto
+            {
+                int net_input_dim_size = net.input_dim_size();
+                CV_Check(net_input_dim_size, net_input_dim_size % 4 == 0, "");
+                CV_CheckEQ(net_input_dim_size, net_input_size * 4, "");
+                for (int inp_id = 0; inp_id < net_input_size; inp_id++)
+                {
+                    int dim = inp_id * 4;
+                    MatShape shape(4);
+                    shape[0] = net.input_dim(dim);
+                    shape[1] = net.input_dim(dim+1);
+                    shape[2] = net.input_dim(dim+2);
+                    shape[3] = net.input_dim(dim+3);
+                    inp_shapes.push_back(shape);
+                }
+            }
+            else if (net.input_shape_size() > 0)  // deprecated in Caffe proto
+            {
+                int net_input_shape_size = net.input_shape_size();
+                CV_CheckEQ(net_input_shape_size, net_input_size, "");
+                for (int inp_id = 0; inp_id < net_input_shape_size; inp_id++)
+                {
+                    MatShape shape = parseBlobShape(net.input_shape(inp_id));
+                    inp_shapes.push_back(shape);
+                }
+            }
+            else
+            {
+                for (int inp_id = 0; inp_id < net_input_size; inp_id++)
+                {
+                    MatShape shape; // empty
+                    inp_shapes.push_back(shape);
+                }
+            }
         }
 
         for (int li = 0; li < layersSize; li++)
@@ -364,6 +409,17 @@ public:
                     addedBlobs.back().outNum = netInputs.size();
                     netInputs.push_back(addedBlobs.back().name);
                 }
+                if (layer.has_input_param())
+                {
+                    const caffe::InputParameter &inputParameter = layer.input_param();
+                    int input_shape_size = inputParameter.shape_size();
+                    CV_CheckEQ(input_shape_size, layer.top_size(), "");
+                    for (int inp_id = 0; inp_id < input_shape_size; inp_id++)
+                    {
+                        MatShape shape = parseBlobShape(inputParameter.shape(inp_id));
+                        inp_shapes.push_back(shape);
+                    }
+                }
                 continue;
             }
             else if (type == "BatchNorm")
@@ -424,35 +480,15 @@ public:
         }
         dstNet.setInputsNames(netInputs);
 
-        std::vector<MatShape> inp_shapes;
-        if (net.input_shape_size() > 0 || (layersSize > 0 && net.layer(0).has_input_param() &&
-            net.layer(0).input_param().shape_size() > 0)) {
-
-            int size = (net.input_shape_size() > 0) ? net.input_shape_size() :
-                                                      net.layer(0).input_param().shape_size();
-            for (int inp_id = 0; inp_id < size; inp_id++)
+        if (inp_shapes.size() > 0)
+        {
+            CV_CheckEQ(inp_shapes.size(), netInputs.size(), "");
+            for (int inp_id = 0; inp_id < inp_shapes.size(); inp_id++)
             {
-                const caffe::BlobShape &_input_shape = (net.input_shape_size() > 0) ?
-                                                        net.input_shape(inp_id) :
-                                                        net.layer(0).input_param().shape(inp_id);
-                MatShape shape;
-                for (int i = 0; i < _input_shape.dim_size(); i++) {
-                    shape.push_back((int)_input_shape.dim(i));
-                }
-                inp_shapes.push_back(shape);
+                if (!inp_shapes[inp_id].empty())
+                    dstNet.setInput(Mat(inp_shapes[inp_id], CV_32F), netInputs[inp_id]);
             }
         }
-        else if (net.input_dim_size() > 0) {
-            MatShape shape;
-            for (int dim = 0; dim < net.input_dim_size(); dim++) {
-                shape.push_back(net.input_dim(dim));
-            }
-            inp_shapes.push_back(shape);
-        }
-
-        for (int inp_id = 0; inp_id < inp_shapes.size(); inp_id++) {
-            dstNet.setInput(Mat(inp_shapes[inp_id], CV_32F), netInputs[inp_id]);
-        }
 
         addedBlobs.clear();
     }
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 7a87d46dc2..0577e2c834 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1418,13 +1418,15 @@ struct Net::Impl
 
             clear();
 
+            this->blobsToKeep = blobsToKeep_;
+
             allocateLayers(blobsToKeep_);
 
             MapIdToLayerData::iterator it = layers.find(0);
             CV_Assert(it != layers.end());
             it->second.skip = netInputLayer->skip;
 
-            initBackend();
+            initBackend(blobsToKeep_);
 
             if (!netWasAllocated)
             {
@@ -1437,7 +1439,6 @@ struct Net::Impl
             }
 
             netWasAllocated = true;
-            this->blobsToKeep = blobsToKeep_;
 
             if (DNN_NETWORK_DUMP > 0)
             {
@@ -1564,7 +1565,7 @@ struct Net::Impl
         ldOut.consumers.push_back(LayerPin(inLayerId, outNum));
     }
 
-    void initBackend()
+    void initBackend(const std::vector<LayerPin>& blobsToKeep_)
     {
         CV_TRACE_FUNCTION();
         if (preferableBackend == DNN_BACKEND_OPENCV)
@@ -1574,7 +1575,7 @@ struct Net::Impl
         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
         {
 #ifdef HAVE_INF_ENGINE
-            initInfEngineBackend();
+            initInfEngineBackend(blobsToKeep_);
 #else
             CV_Assert(false && "This OpenCV version is built without Inference Engine API support");
 #endif
@@ -1582,7 +1583,7 @@ struct Net::Impl
         else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
 #ifdef HAVE_DNN_NGRAPH
-            initNgraphBackend();
+            initNgraphBackend(blobsToKeep_);
 #else
             CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph");
 #endif
@@ -1688,7 +1689,7 @@ struct Net::Impl
         }
     }
 
-    void initInfEngineBackend()
+    void initInfEngineBackend(const std::vector<LayerPin>& blobsToKeep_)
     {
         CV_TRACE_FUNCTION();
         CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine());
@@ -1878,6 +1879,15 @@ struct Net::Impl
             CV_Assert(!ieNode.empty());
             ieNode->net = net;
 
+            for (const auto& pin : blobsToKeep_)
+            {
+                if (pin.lid == ld.id)
+                {
+                    ieNode->net->addOutput(ieNode->layer.getName());
+                    break;
+                }
+            }
+
             // Convert weights in FP16 for specific targets.
             if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
                  preferableTarget == DNN_TARGET_MYRIAD ||
@@ -1984,7 +1994,7 @@ struct Net::Impl
         }
     }
 
-    void initNgraphBackend()
+    void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
     {
         CV_TRACE_FUNCTION();
         CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine());
@@ -2173,6 +2183,14 @@ struct Net::Impl
                 // TF EAST_text_detection
                 ieNode->net->setUnconnectedNodes(ieNode);
             }
+            for (const auto& pin : blobsToKeep_)
+            {
+                if (pin.lid == ld.id)
+                {
+                    ieNode->net->addOutput(ieNode->node->get_friendly_name());
+                    break;
+                }
+            }
             ieNode->net->setNodePtr(&ieNode->node);
 
             net->addBlobs(ld.inputBlobsWrappers);
diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index 2a00880c42..d7df547412 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -231,11 +231,10 @@ void InfEngineNgraphNet::init(Target targetId)
                 }
             }
         }
-    } else {
-        for (const auto& name : requestedOutputs)
-        {
-            cnn.addOutput(name);
-        }
+    }
+    for (const auto& name : requestedOutputs)
+    {
+        cnn.addOutput(name);
     }
 
     for (const auto& it : cnn.getInputsInfo())
diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp
index ab242e1b2e..a4443ed3a2 100644
--- a/modules/dnn/src/layers/crop_and_resize_layer.cpp
+++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp
@@ -5,6 +5,7 @@
 // Copyright (C) 2018, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 #include "../precomp.hpp"
+#include "../ie_ngraph.hpp"
 #include "layers_common.hpp"
 
 #ifdef HAVE_CUDA
@@ -25,6 +26,14 @@ public:
         outHeight = params.get<float>("height");
     }
 
+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV
+               || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
+               || backendId == DNN_BACKEND_CUDA
+        ;
+    }
+
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
                          const int requiredOutputs,
                          std::vector<MatShape> &outputs,
@@ -41,11 +50,6 @@ public:
         return false;
     }
 
-    virtual bool supportBackend(int backendId) CV_OVERRIDE
-    {
-        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
-    }
-
     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
     {
         CV_TRACE_FUNCTION();
@@ -121,6 +125,41 @@ public:
         }
     }
 
+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        // Slice second input: from 1x1xNx7 to 1x1xNx5
+        auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto rois = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+
+        std::vector<size_t> dims = rois->get_shape(), offsets(4, 0);
+        offsets[3] = 2;
+        dims[3] = 7;
+
+        auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                             ngraph::Shape{offsets.size()}, offsets.data());
+        auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                             ngraph::Shape{dims.size()}, dims.data());
+        auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                        ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
+        auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(rois,
+                                      lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
+
+        // Reshape rois from 4D to 2D
+        std::vector<size_t> shapeData = {dims[2], 5};
+        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shapeData.data());
+        auto reshape = std::make_shared<ngraph::op::v1::Reshape>(slice, shape, true);
+
+        auto roiPooling =
+            std::make_shared<ngraph::op::v0::ROIPooling>(input, reshape,
+                                                         ngraph::Shape{(size_t)outHeight, (size_t)outWidth},
+                                                         1.0f, "bilinear");
+
+        return Ptr<BackendNode>(new InfEngineNgraphNode(roiPooling));
+    }
+#endif  // HAVE_DNN_NGRAPH
+
 #ifdef HAVE_CUDA
     Ptr<BackendNode> initCUDA(
         void *context_,
diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp
index 0c5f76baab..3a06bb0788 100644
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@ -41,7 +41,7 @@ public:
             CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y"));
         }
         interpolation = params.get<String>("interpolation");
-        CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
+        CV_Assert(interpolation == "nearest" || interpolation == "opencv_linear" || interpolation == "bilinear");
 
         alignCorners = params.get<bool>("align_corners", false);
     }
@@ -115,14 +115,15 @@ public:
 
         Mat& inp = inputs[0];
         Mat& out = outputs[0];
-        if (interpolation == "nearest")
+        if (interpolation == "nearest" || interpolation == "opencv_linear")
         {
+            InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR;
             for (size_t n = 0; n < inputs[0].size[0]; ++n)
             {
                 for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
                 {
                     resize(getPlane(inp, n, ch), getPlane(out, n, ch),
-                           Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
+                           Size(outWidth, outHeight), 0, 0, mode);
                 }
             }
         }
diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp
index 684b971cbb..c60a4174b7 100644
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@@ -61,7 +61,8 @@ public:
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                backendId == DNN_BACKEND_HALIDE ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1) ||
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0);
     }
 
     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
@@ -263,22 +264,26 @@ public:
         auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
 
         std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
-        shape[1] = numChannels;
-        auto weight = hasWeights ?
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), blobs[0].data) :
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), std::vector<float>(numChannels, 1).data());
+        int cAxis = clamp(axis, shape.size());
+        shape[cAxis] = numChannels;
 
-        auto bias = hasBias ?
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), blobs.back().data) :
-                    std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
-                                                           ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
-
-        auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
-        auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
-        return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
+        auto node = ieInpNode;
+        if (hasWeights)
+        {
+            auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                                                 ngraph::Shape(shape), blobs[0].data);
+            node = std::make_shared<ngraph::op::v1::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
+        }
+        if (hasBias || !hasWeights)
+        {
+            auto bias = hasBias ?
+                        std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                                               ngraph::Shape(shape), blobs.back().data) :
+                        std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                                               ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
+            node = std::make_shared<ngraph::op::v1::Add>(node, bias, ngraph::op::AutoBroadcastType::NUMPY);
+        }
+        return Ptr<BackendNode>(new InfEngineNgraphNode(node));
     }
 #endif  // HAVE_DNN_NGRAPH
 
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 832bbcacbf..c68846299f 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -485,16 +485,23 @@ void ONNXImporter::populateNet(Net dstNet)
          }
         else if (layer_type == "Split")
         {
-            DictValue splits = layerParams.get("split");
-            const int numSplits = splits.size();
-            CV_Assert(numSplits > 1);
-
-            std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
-            for (int i = 1; i < splits.size() - 1; ++i)
+            if (layerParams.has("split"))
             {
-                slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i - 1);
+                DictValue splits = layerParams.get("split");
+                const int numSplits = splits.size();
+                CV_Assert(numSplits > 1);
+
+                std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
+                for (int i = 1; i < splits.size() - 1; ++i)
+                {
+                    slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i - 1);
+                }
+                layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
+            }
+            else
+            {
+                layerParams.set("num_split", node_proto.output_size());
             }
-            layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
             layerParams.type = "Slice";
         }
         else if (layer_type == "Add" || layer_type == "Sum")
@@ -973,6 +980,15 @@ void ONNXImporter::populateNet(Net dstNet)
                 replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
             }
             replaceLayerParam(layerParams, "mode", "interpolation");
+
+            if (layerParams.get<String>("interpolation") == "linear" && framework_name == "pytorch") {
+                layerParams.type = "Resize";
+                Mat scales = getBlob(node_proto, constBlobs, 1);
+                CV_Assert(scales.total() == 4);
+                layerParams.set("interpolation", "opencv_linear");
+                layerParams.set("zoom_factor_y", scales.at<float>(2));
+                layerParams.set("zoom_factor_x", scales.at<float>(3));
+            }
         }
         else if (layer_type == "LogSoftmax")
         {
diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp
index e697c5a798..869a3575d7 100644
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@@ -73,28 +73,7 @@ struct OpenVINOModelTestCaseInfo
 static const std::map<std::string, OpenVINOModelTestCaseInfo>& getOpenVINOTestModels()
 {
     static std::map<std::string, OpenVINOModelTestCaseInfo> g_models {
-#if INF_ENGINE_RELEASE <= 2018050000
-        { "age-gender-recognition-retail-0013", {
-            "deployment_tools/intel_models/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013",
-            "deployment_tools/intel_models/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013"
-        }},
-        { "face-person-detection-retail-0002", {
-            "deployment_tools/intel_models/face-person-detection-retail-0002/FP32/face-person-detection-retail-0002",
-            "deployment_tools/intel_models/face-person-detection-retail-0002/FP16/face-person-detection-retail-0002"
-        }},
-        { "head-pose-estimation-adas-0001", {
-            "deployment_tools/intel_models/head-pose-estimation-adas-0001/FP32/head-pose-estimation-adas-0001",
-            "deployment_tools/intel_models/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001"
-        }},
-        { "person-detection-retail-0002", {
-            "deployment_tools/intel_models/person-detection-retail-0002/FP32/person-detection-retail-0002",
-            "deployment_tools/intel_models/person-detection-retail-0002/FP16/person-detection-retail-0002"
-        }},
-        { "vehicle-detection-adas-0002", {
-            "deployment_tools/intel_models/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002",
-            "deployment_tools/intel_models/vehicle-detection-adas-0002/FP16/vehicle-detection-adas-0002"
-        }}
-#else
+#if INF_ENGINE_RELEASE >= 2018050000
         // layout is defined by open_model_zoo/model_downloader
         // Downloaded using these parameters for Open Model Zoo downloader (2019R1):
         // ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \
@@ -118,7 +97,16 @@ static const std::map<std::string, OpenVINOModelTestCaseInfo>& getOpenVINOTestMo
         { "vehicle-detection-adas-0002", {
             "Transportation/object_detection/vehicle/mobilenet-reduced-ssd/dldt/vehicle-detection-adas-0002",
             "Transportation/object_detection/vehicle/mobilenet-reduced-ssd/dldt/vehicle-detection-adas-0002-fp16"
-        }}
+        }},
+#endif
+#if INF_ENGINE_RELEASE >= 2020010000
+        // Downloaded using these parameters for Open Model Zoo downloader (2020.1):
+        // ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \
+        //     --name person-detection-retail-0013
+        { "person-detection-retail-0013", {  // IRv10
+            "intel/person-detection-retail-0013/FP32/person-detection-retail-0013",
+            "intel/person-detection-retail-0013/FP16/person-detection-retail-0013"
+        }},
 #endif
     };
 
@@ -305,8 +293,8 @@ TEST_P(DNNTestOpenVINO, models)
     OpenVINOModelTestCaseInfo modelInfo = it->second;
     std::string modelPath = isFP16 ? modelInfo.modelPathFP16 : modelInfo.modelPathFP32;
 
-    std::string xmlPath = findDataFile(modelPath + ".xml");
-    std::string binPath = findDataFile(modelPath + ".bin");
+    std::string xmlPath = findDataFile(modelPath + ".xml", false);
+    std::string binPath = findDataFile(modelPath + ".bin", false);
 
     std::map<std::string, cv::Mat> inputsMap;
     std::map<std::string, cv::Mat> ieOutputsMap, cvOutputsMap;
@@ -316,13 +304,19 @@ TEST_P(DNNTestOpenVINO, models)
     runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap);
     runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap);
 
+    double eps = 0;
+#if INF_ENGINE_VER_MAJOR_GE(2020010000)
+    if (targetId == DNN_TARGET_CPU && checkHardwareSupport(CV_CPU_AVX_512F))
+        eps = 1e-5;
+#endif
+
     EXPECT_EQ(ieOutputsMap.size(), cvOutputsMap.size());
     for (auto& srcIt : ieOutputsMap)
     {
         auto dstIt = cvOutputsMap.find(srcIt.first);
         CV_Assert(dstIt != cvOutputsMap.end());
         double normInf = cvtest::norm(srcIt.second, dstIt->second, cv::NORM_INF);
-        EXPECT_EQ(normInf, 0);
+        EXPECT_LE(normInf, eps) << "output=" << srcIt.first;
     }
 }
 
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 1fca878ec1..df953ca79d 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -335,6 +335,9 @@ TEST_P(Test_ONNX_layers, Padding)
 TEST_P(Test_ONNX_layers, Resize)
 {
     testONNXModels("resize_nearest");
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    testONNXModels("resize_bilinear");
 }
 
 TEST_P(Test_ONNX_layers, MultyInputs)
@@ -411,6 +414,18 @@ TEST_P(Test_ONNX_layers, ReduceL2)
     testONNXModels("reduceL2");
 }
 
+TEST_P(Test_ONNX_layers, Split)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    testONNXModels("split_1");
+    testONNXModels("split_2");
+    testONNXModels("split_3");
+    testONNXModels("split_4");
+}
+
 TEST_P(Test_ONNX_layers, Slice)
 {
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 1ef235e8a0..e25243b52d 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -994,8 +994,16 @@ TEST(Test_TensorFlow, two_inputs)
     normAssert(out, firstInput + secondInput);
 }
 
-TEST(Test_TensorFlow, Mask_RCNN)
+TEST_P(Test_TensorFlow_nets, Mask_RCNN)
 {
+    static const double kMaskThreshold = 0.5;
+
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
+
     applyTestTag(CV_TEST_TAG_MEMORY_1GB, CV_TEST_TAG_DEBUG_VERYLONG);
     Mat img = imread(findDataFile("dnn/street.png"));
     std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt");
@@ -1006,7 +1014,8 @@ TEST(Test_TensorFlow, Mask_RCNN)
     Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy"));
     Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false);
 
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
 
     net.setInput(blob);
 
@@ -1020,7 +1029,10 @@ TEST(Test_TensorFlow, Mask_RCNN)
 
     Mat outDetections = outs[0];
     Mat outMasks = outs[1];
-    normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5);
+
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.019 : 2e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : default_lInf;
+    normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
 
     // Output size of masks is NxCxHxW where
     // N - number of detected boxes
@@ -1044,7 +1056,18 @@ TEST(Test_TensorFlow, Mask_RCNN)
         outMasks(srcRanges).copyTo(masks(dstRanges));
     }
     cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()};
-    normAssert(masks, refMasks(&topRefMasks[0]));
+    refMasks = refMasks(&topRefMasks[0]);
+
+    // make binary masks
+    cv::threshold(masks.reshape(1, 1), masks, kMaskThreshold, 1, THRESH_BINARY);
+    cv::threshold(refMasks.reshape(1, 1), refMasks, kMaskThreshold, 1, THRESH_BINARY);
+
+    double inter = cv::countNonZero(masks & refMasks);
+    double area = cv::countNonZero(masks | refMasks);
+    EXPECT_GE(inter / area, 0.99);
+
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        expectNoFallbacks(net);
 }
 
 }
diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm
index 08b11ae89a..29a0278c98 100644
--- a/modules/highgui/src/window_cocoa.mm
+++ b/modules/highgui/src/window_cocoa.mm
@@ -112,11 +112,14 @@ static bool wasInitialized = false;
     BOOL autosize;
     BOOL firstContent;
     int status;
+    int x0, y0;
 }
 @property(assign) CvMouseCallback mouseCallback;
 @property(assign) void *mouseParam;
 @property(assign) BOOL autosize;
 @property(assign) BOOL firstContent;
+@property(assign) int x0;
+@property(assign) int y0;
 @property(retain) NSMutableDictionary *sliders;
 @property(readwrite) int status;
 - (CVView *)contentView;
@@ -252,6 +255,16 @@ CV_IMPL void cvShowImage( const char* name, const CvArr* arr)
                 contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;
                 contentSize.width = std::max<int>(scaledImageSize.width, MIN_SLIDER_WIDTH);
                 [window setContentSize:contentSize]; //adjust sliders to fit new window size
+                if([window firstContent])
+                {
+                    int x = [window x0];
+                    int y = [window y0];
+                    if(x >= 0 && y >= 0)
+                    {
+                        y = [[window screen] visibleFrame].size.height - y;
+                        [window setFrameTopLeftPoint:NSMakePoint(x, y)];
+                    }
+                }
             }
         }
         [window setFirstContent:NO];
@@ -275,7 +288,6 @@ CV_IMPL void cvResizeWindow( const char* name, int width, int height)
 
 CV_IMPL void cvMoveWindow( const char* name, int x, int y)
 {
-
     CV_FUNCNAME("cvMoveWindow");
     __BEGIN__;
 
@@ -287,8 +299,14 @@ CV_IMPL void cvMoveWindow( const char* name, int x, int y)
     //cout << "cvMoveWindow"<< endl;
     window = cvGetWindow(name);
     if(window) {
-        y = [[window screen] frame].size.height - y;
-        [window setFrameTopLeftPoint:NSMakePoint(x, y)];
+        if([window firstContent]) {
+            [window setX0:x];
+            [window setY0:y];
+        }
+        else {
+            y = [[window screen] visibleFrame].size.height - y;
+            [window setFrameTopLeftPoint:NSMakePoint(x, y)];
+        }
     }
     [localpool1 drain];
 
@@ -557,6 +575,8 @@ CV_IMPL int cvNamedWindow( const char* name, int flags )
     [window setFrameTopLeftPoint:initContentRect.origin];
 
     [window setFirstContent:YES];
+    [window setX0:-1];
+    [window setY0:-1];
 
     [window setContentView:[[CVView alloc] init]];
 
@@ -819,6 +839,8 @@ static NSSize constrainAspectRatio(NSSize base, NSSize constraint) {
 @synthesize mouseParam;
 @synthesize autosize;
 @synthesize firstContent;
+@synthesize x0;
+@synthesize y0;
 @synthesize sliders;
 @synthesize status;
 
diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp
index 4b4f17ac94..d63e07969c 100644
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -1413,6 +1413,21 @@ TEST(Resize, lanczos4_regression_16192)
     EXPECT_EQ(cvtest::norm(dst, expected, NORM_INF), 0) << dst(Rect(0,0,8,8));
 }
 
+TEST(Resize, DISABLED_nearest_regression_15075)  // reverted https://github.com/opencv/opencv/pull/16497
+{
+    const int C = 5;
+    const int i1 = 5, j1 = 5;
+    Size src_size(12, 12);
+    Size dst_size(11, 11);
+
+    cv::Mat src = cv::Mat::zeros(src_size, CV_8UC(C)), dst;
+    for (int j = 0; j < C; j++)
+        src.col(i1).row(j1).data[j] = 1;
+
+    cv::resize(src, dst, dst_size, 0, 0, INTER_NEAREST);
+    EXPECT_EQ(C, cvtest::norm(dst, NORM_L1)) << src.size;
+}
+
 TEST(Imgproc_Warp, multichannel)
 {
     static const int inter_types[] = {INTER_NEAREST, INTER_AREA, INTER_CUBIC,
diff --git a/modules/python/package/template/config.py.in b/modules/python/package/template/config.py.in
index 5fc444f175..7f18a920e0 100644
--- a/modules/python/package/template/config.py.in
+++ b/modules/python/package/template/config.py.in
@@ -1,3 +1,5 @@
+import os
+
 BINARIES_PATHS = [
     @CMAKE_PYTHON_BINARIES_PATH@
 ] + BINARIES_PATHS
diff --git a/modules/python/python_loader.cmake b/modules/python/python_loader.cmake
index 663be5c824..31cd33505a 100644
--- a/modules/python/python_loader.cmake
+++ b/modules/python/python_loader.cmake
@@ -58,7 +58,13 @@ if(NOT OpenCV_FOUND)  # Ignore "standalone" builds of Python bindings
     else()
       list(APPEND CMAKE_PYTHON_BINARIES_INSTALL_PATH "os.path.join(${CMAKE_PYTHON_EXTENSION_INSTALL_PATH_BASE}, '${OPENCV_LIB_INSTALL_PATH}')")
     endif()
-    string(REPLACE ";" ",\n    " CMAKE_PYTHON_BINARIES_PATH "${CMAKE_PYTHON_BINARIES_INSTALL_PATH}")
+    set(CMAKE_PYTHON_BINARIES_PATH "${CMAKE_PYTHON_BINARIES_INSTALL_PATH}")
+    if (WIN32 AND HAVE_CUDA)
+      if (DEFINED CUDA_TOOLKIT_ROOT_DIR)
+        list(APPEND CMAKE_PYTHON_BINARIES_PATH "os.path.join(os.getenv('CUDA_PATH', '${CUDA_TOOLKIT_ROOT_DIR}'), 'bin')")
+      endif()
+    endif()
+    string(REPLACE ";" ",\n    " CMAKE_PYTHON_BINARIES_PATH "${CMAKE_PYTHON_BINARIES_PATH}")
     configure_file("${PYTHON_SOURCE_DIR}/package/template/config.py.in" "${__python_loader_install_tmp_path}/cv2/config.py" @ONLY)
     install(FILES "${__python_loader_install_tmp_path}/cv2/config.py" DESTINATION "${OPENCV_PYTHON_INSTALL_PATH}/cv2/" COMPONENT python)
   endif()
diff --git a/samples/cpp/falsecolor.cpp b/samples/cpp/falsecolor.cpp
index ed422b39c1..f73ffad4ce 100644
--- a/samples/cpp/falsecolor.cpp
+++ b/samples/cpp/falsecolor.cpp
@@ -41,10 +41,10 @@ static Mat DrawMyImage(int thickness,int nbShape)
 {
     Mat img=Mat::zeros(500,256*thickness+100,CV_8UC1);
     int offsetx = 50, offsety = 25;
-    int lineLenght = 50;
+    int lineLength = 50;
 
     for (int i=0;i<256;i++)
-        line(img,Point(thickness*i+ offsetx, offsety),Point(thickness*i+ offsetx, offsety+ lineLenght),Scalar(i), thickness);
+        line(img,Point(thickness*i+ offsetx, offsety),Point(thickness*i+ offsetx, offsety+ lineLength),Scalar(i), thickness);
     RNG r;
     Point center;
     int radius;
@@ -57,19 +57,19 @@ static Mat DrawMyImage(int thickness,int nbShape)
         int typeShape = r.uniform(MyCIRCLE, MyELLIPSE+1);
         switch (typeShape) {
         case MyCIRCLE:
-            center = Point(r.uniform(offsetx,img.cols- offsetx), r.uniform(offsety + lineLenght, img.rows - offsety));
+            center = Point(r.uniform(offsetx,img.cols- offsetx), r.uniform(offsety + lineLength, img.rows - offsety));
             radius = r.uniform(1, min(offsetx, offsety));
             circle(img,center,radius,Scalar(i),-1);
             break;
         case MyRECTANGLE:
-            center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLenght, img.rows - offsety));
+            center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLength, img.rows - offsety));
             width = r.uniform(1, min(offsetx, offsety));
             height = r.uniform(1, min(offsetx, offsety));
             rc = Rect(center-Point(width ,height )/2, center + Point(width , height )/2);
             rectangle(img,rc, Scalar(i), -1);
             break;
         case MyELLIPSE:
-            center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLenght, img.rows - offsety));
+            center = Point(r.uniform(offsetx, img.cols - offsetx), r.uniform(offsety + lineLength, img.rows - offsety));
             width = r.uniform(1, min(offsetx, offsety));
             height = r.uniform(1, min(offsetx, offsety));
             angle = r.uniform(0, 180);
diff --git a/samples/dnn/human_parsing.py b/samples/dnn/human_parsing.py
index c4ac11bad2..f84d2038e4 100644
--- a/samples/dnn/human_parsing.py
+++ b/samples/dnn/human_parsing.py
@@ -40,6 +40,7 @@ Follow these steps if you want to convert the original model yourself:
 '''
 
 import argparse
+import os.path
 import numpy as np
 import cv2 as cv
 
@@ -48,12 +49,11 @@ backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.
 targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
 
 
-def preprocess(image_path):
+def preprocess(image):
     """
     Create 4-dimensional blob from image and flip image
-    :param image_path: path to input image
+    :param image: input image
     """
-    image = cv.imread(image_path)
     image_rev = np.flip(image, axis=1)
     input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434))
     return input
@@ -137,15 +137,15 @@ def decode_labels(gray_image):
     return segm
 
 
-def parse_human(image_path, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU):
+def parse_human(image, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU):
     """
     Prepare input for execution, run net and postprocess output to parse human.
-    :param image_path: path to input image
+    :param image: input image
     :param model_path: path to JPPNet model
     :param backend: name of computation backend
     :param target: name of computation target
     """
-    input = preprocess(image_path)
+    input = preprocess(image)
     input_h, input_w = input.shape[2:]
     output = run_net(input, model_path, backend, target)
     grayscale_out = postprocess(output, (input_w, input_h))
@@ -157,7 +157,7 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet',
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--input', '-i', required=True, help='Path to input image.')
-    parser.add_argument('--model', '-m', required=True, help='Path to pb model.')
+    parser.add_argument('--model', '-m', default='lip_jppnet_384.pb', help='Path to pb model.')
     parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
                         help="Choose one of computation backends: "
                              "%d: automatically (by default), "
@@ -171,7 +171,11 @@ if __name__ == '__main__':
                              '%d: VPU' % targets)
     args, _ = parser.parse_known_args()
 
-    output = parse_human(args.input, args.model, args.backend, args.target)
+    if not os.path.isfile(args.model):
+        raise OSError("Model not exist")
+
+    image = cv.imread(args.input)
+    output = parse_human(image, args.model, args.backend, args.target)
     winName = 'Deep learning human parsing in OpenCV'
     cv.namedWindow(winName, cv.WINDOW_AUTOSIZE)
     cv.imshow(winName, output)
diff --git a/samples/dnn/virtual_try_on.py b/samples/dnn/virtual_try_on.py
new file mode 100644
index 0000000000..bbc655a276
--- /dev/null
+++ b/samples/dnn/virtual_try_on.py
@@ -0,0 +1,465 @@
+#!/usr/bin/env python3
+'''
+You can download the Geometric Matching Module model from https://www.dropbox.com/s/tyhc73xa051grjp/cp_vton_gmm.onnx?dl=0
+You can download the Try-On Module model from https://www.dropbox.com/s/q2x97ve2h53j66k/cp_vton_tom.onnx?dl=0
+You can download the cloth segmentation model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
+You can find the OpenPose proto in opencv_extra/testdata/dnn/openpose_pose_coco.prototxt
+and get .caffemodel using opencv_extra/testdata/dnn/download_models.py
+'''
+
+import argparse
+import os.path
+import numpy as np
+import cv2 as cv
+
+from numpy import linalg
+from common import findFile
+from human_parsing import parse_human
+
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
+
+parser = argparse.ArgumentParser(description='Use this script to run virtial try-on using CP-VTON',
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--input_image', '-i', required=True, help='Path to image with person.')
+parser.add_argument('--input_cloth', '-c', required=True, help='Path to target cloth image')
+parser.add_argument('--gmm_model', '-gmm', default='cp_vton_gmm.onnx', help='Path to Geometric Matching Module .onnx model.')
+parser.add_argument('--tom_model', '-tom', default='cp_vton_tom.onnx', help='Path to Try-On Module .onnx model.')
+parser.add_argument('--segmentation_model', default='lip_jppnet_384.pb', help='Path to cloth segmentation .pb model.')
+parser.add_argument('--openpose_proto', default='openpose_pose_coco.prototxt', help='Path to OpenPose .prototxt model was trained on COCO dataset.')
+parser.add_argument('--openpose_model', default='openpose_pose_coco.caffemodel', help='Path to OpenPose .caffemodel model was trained on COCO dataset.')
+parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+                    help="Choose one of computation backends: "
+                            "%d: automatically (by default), "
+                            "%d: Halide language (http://halide-lang.org/), "
+                            "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                            "%d: OpenCV implementation" % backends)
+parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
+                    help='Choose one of target computation devices: '
+                            '%d: CPU target (by default), '
+                            '%d: OpenCL, '
+                            '%d: OpenCL fp16 (half-float precision), '
+                            '%d: VPU' % targets)
+args, _ = parser.parse_known_args()
+
+
+def get_pose_map(image, proto_path, model_path, backend, target, height=256, width=192):
+    radius = 5
+    inp = cv.dnn.blobFromImage(image, 1.0 / 255, (width, height))
+
+    net = cv.dnn.readNet(proto_path, model_path)
+    net.setPreferableBackend(backend)
+    net.setPreferableTarget(target)
+    net.setInput(inp)
+    out = net.forward()
+
+    threshold = 0.1
+    _, out_c, out_h, out_w = out.shape
+    pose_map = np.zeros((height, width, out_c - 1))
+    # last label: Background
+    for i in range(0, out.shape[1] - 1):
+        heatMap = out[0, i, :, :]
+        keypoint = np.full((height, width), -1)
+        _, conf, _, point = cv.minMaxLoc(heatMap)
+        x = width * point[0] // out_w
+        y = height * point[1] // out_h
+        if conf > threshold and x > 0 and y > 0:
+            keypoint[y - radius:y + radius, x - radius:x + radius] = 1
+        pose_map[:, :, i] = keypoint
+
+    pose_map = pose_map.transpose(2, 0, 1)
+    return pose_map
+
+
+class BilinearFilter(object):
+    """
+    PIL bilinear resize implementation
+    image = image.resize((image_width // 16, image_height // 16), Image.BILINEAR)
+    """
+    def _precompute_coeffs(self, inSize, outSize):
+        filterscale = max(1.0, inSize / outSize)
+        ksize = int(np.ceil(filterscale)) * 2 + 1
+
+        kk = np.zeros(shape=(outSize * ksize, ), dtype=np.float32)
+        bounds = np.empty(shape=(outSize * 2, ), dtype=np.int32)
+
+        centers = (np.arange(outSize) + 0.5) * filterscale + 0.5
+        bounds[::2] = np.where(centers - filterscale < 0, 0, centers - filterscale)
+        bounds[1::2] = np.where(centers + filterscale > inSize, inSize, centers + filterscale) - bounds[::2]
+        xmins = bounds[::2] - centers + 1
+
+        points = np.array([np.arange(row) + xmins[i] for i, row in enumerate(bounds[1::2])]) / filterscale
+        for xx in range(0, outSize):
+            point = points[xx]
+            bilinear = np.where(point < 1.0, 1.0 - abs(point), 0.0)
+            ww = np.sum(bilinear)
+            kk[xx * ksize : xx * ksize + bilinear.size] = np.where(ww == 0.0, bilinear, bilinear / ww)
+        return bounds, kk, ksize
+
+    def _resample_horizontal(self, out, img, ksize, bounds, kk):
+        for yy in range(0, out.shape[0]):
+            for xx in range(0, out.shape[1]):
+                xmin = bounds[xx * 2 + 0]
+                xmax = bounds[xx * 2 + 1]
+                k = kk[xx * ksize : xx * ksize + xmax]
+                out[yy, xx] = np.round(np.sum(img[yy, xmin : xmin + xmax] * k))
+
+    def _resample_vertical(self, out, img, ksize, bounds, kk):
+        for yy in range(0, out.shape[0]):
+            ymin = bounds[yy * 2 + 0]
+            ymax = bounds[yy * 2 + 1]
+            k = kk[yy * ksize: yy * ksize + ymax]
+            out[yy] = np.round(np.sum(img[ymin : ymin + ymax, 0:out.shape[1]] * k[:, np.newaxis], axis=0))
+
+    def imaging_resample(self, img, xsize, ysize):
+        height, width, *args = img.shape
+        bounds_horiz, kk_horiz, ksize_horiz = self._precompute_coeffs(width, xsize)
+        bounds_vert, kk_vert, ksize_vert    = self._precompute_coeffs(height, ysize)
+
+        out_hor = np.empty((img.shape[0], xsize), dtype=np.uint8)
+        self._resample_horizontal(out_hor, img, ksize_horiz, bounds_horiz, kk_horiz)
+        out = np.empty((ysize, xsize), dtype=np.uint8)
+        self._resample_vertical(out, out_hor, ksize_vert, bounds_vert, kk_vert)
+        return out
+
+
+class CpVton(object):
+    def __init__(self, gmm_model, tom_model, backend, target):
+        super(CpVton, self).__init__()
+        self.gmm_net = cv.dnn.readNet(gmm_model)
+        self.tom_net = cv.dnn.readNet(tom_model)
+        self.gmm_net.setPreferableBackend(backend)
+        self.gmm_net.setPreferableTarget(target)
+        self.tom_net.setPreferableBackend(backend)
+        self.tom_net.setPreferableTarget(target)
+
+    def prepare_agnostic(self, segm_image, input_image, pose_map, height=256, width=192):
+        palette = {
+            'Background'   : (0, 0, 0),
+            'Hat'          : (128, 0, 0),
+            'Hair'         : (255, 0, 0),
+            'Glove'        : (0, 85, 0),
+            'Sunglasses'   : (170, 0, 51),
+            'UpperClothes' : (255, 85, 0),
+            'Dress'        : (0, 0, 85),
+            'Coat'         : (0, 119, 221),
+            'Socks'        : (85, 85, 0),
+            'Pants'        : (0, 85, 85),
+            'Jumpsuits'    : (85, 51, 0),
+            'Scarf'        : (52, 86, 128),
+            'Skirt'        : (0, 128, 0),
+            'Face'         : (0, 0, 255),
+            'Left-arm'     : (51, 170, 221),
+            'Right-arm'    : (0, 255, 255),
+            'Left-leg'     : (85, 255, 170),
+            'Right-leg'    : (170, 255, 85),
+            'Left-shoe'    : (255, 255, 0),
+            'Right-shoe'   : (255, 170, 0)
+        }
+        color2label = {val: key for key, val in palette.items()}
+        head_labels = ['Hat', 'Hair', 'Sunglasses', 'Face', 'Pants', 'Skirt']
+
+        segm_image = cv.cvtColor(segm_image, cv.COLOR_BGR2RGB)
+        phead = np.zeros((1, height, width), dtype=np.float32)
+        pose_shape = np.zeros((height, width), dtype=np.uint8)
+        for r in range(height):
+            for c in range(width):
+                pixel = tuple(segm_image[r, c])
+                if tuple(pixel) in color2label:
+                    if color2label[pixel] in head_labels:
+                        phead[0, r, c] = 1
+                    if color2label[pixel] != 'Background':
+                        pose_shape[r, c] = 255
+
+        input_image = cv.dnn.blobFromImage(input_image, 1.0 / 127.5, (width, height), mean=(127.5, 127.5, 127.5), swapRB=True)
+        input_image = input_image.squeeze(0)
+
+        img_head = input_image * phead - (1 - phead)
+
+        downsample = BilinearFilter()
+        down = downsample.imaging_resample(pose_shape, width // 16, height // 16)
+        res_shape = cv.resize(down, (width, height), cv.INTER_LINEAR)
+
+        res_shape = cv.dnn.blobFromImage(res_shape, 1.0 / 127.5, mean=(127.5, 127.5, 127.5), swapRB=True)
+        res_shape = res_shape.squeeze(0)
+
+        agnostic = np.concatenate((res_shape, img_head, pose_map), axis=0)
+        agnostic = np.expand_dims(agnostic, axis=0)
+        return agnostic
+
+    def get_warped_cloth(self, cloth_img, agnostic, height=256, width=192):
+        cloth = cv.dnn.blobFromImage(cloth_img, 1.0 / 127.5, (width, height), mean=(127.5, 127.5, 127.5), swapRB=True)
+
+        self.gmm_net.setInput(agnostic, "input.1")
+        self.gmm_net.setInput(cloth, "input.18")
+        theta = self.gmm_net.forward()
+
+        grid = self._generate_grid(theta)
+        warped_cloth = self._bilinear_sampler(cloth, grid).astype(np.float32)
+        return warped_cloth
+
+    def get_tryon(self, agnostic, warp_cloth):
+        inp = np.concatenate([agnostic, warp_cloth], axis=1)
+        self.tom_net.setInput(inp)
+        out = self.tom_net.forward()
+
+        p_rendered, m_composite = np.split(out, [3], axis=1)
+        p_rendered = np.tanh(p_rendered)
+        m_composite = 1 / (1 + np.exp(-m_composite))
+
+        p_tryon = warp_cloth * m_composite + p_rendered * (1 - m_composite)
+        rgb_p_tryon = cv.cvtColor(p_tryon.squeeze(0).transpose(1, 2, 0), cv.COLOR_BGR2RGB)
+        rgb_p_tryon = (rgb_p_tryon + 1) / 2
+        return rgb_p_tryon
+
+    def _compute_L_inverse(self, X, Y):
+        N = X.shape[0]
+
+        Xmat = np.tile(X, (1, N))
+        Ymat = np.tile(Y, (1, N))
+        P_dist_squared = np.power(Xmat - Xmat.transpose(1, 0), 2) + np.power(Ymat - Ymat.transpose(1, 0), 2)
+
+        P_dist_squared[P_dist_squared == 0] = 1
+        K = np.multiply(P_dist_squared, np.log(P_dist_squared))
+
+        O = np.ones([N, 1], dtype=np.float32)
+        Z = np.zeros([3, 3], dtype=np.float32)
+        P = np.concatenate([O, X, Y], axis=1)
+        first = np.concatenate((K, P), axis=1)
+        second = np.concatenate((P.transpose(1, 0), Z), axis=1)
+        L = np.concatenate((first, second), axis=0)
+        Li = linalg.inv(L)
+        return Li
+
+    def _prepare_to_transform(self, out_h=256, out_w=192, grid_size=5):
+        grid = np.zeros([out_h, out_w, 3], dtype=np.float32)
+        grid_X, grid_Y = np.meshgrid(np.linspace(-1, 1, out_w), np.linspace(-1, 1, out_h))
+        grid_X = np.expand_dims(np.expand_dims(grid_X, axis=0), axis=3)
+        grid_Y = np.expand_dims(np.expand_dims(grid_Y, axis=0), axis=3)
+
+        axis_coords = np.linspace(-1, 1, grid_size)
+        N = grid_size ** 2
+        P_Y, P_X = np.meshgrid(axis_coords, axis_coords)
+
+        P_X = np.reshape(P_X,(-1, 1))
+        P_Y = np.reshape(P_Y,(-1, 1))
+
+        P_X = np.expand_dims(np.expand_dims(np.expand_dims(P_X, axis=2), axis=3), axis=4).transpose(4, 1, 2, 3, 0)
+        P_Y = np.expand_dims(np.expand_dims(np.expand_dims(P_Y, axis=2), axis=3), axis=4).transpose(4, 1, 2, 3, 0)
+        return grid_X, grid_Y, N, P_X, P_Y
+
+    def _expand_torch(self, X, shape):
+        if len(X.shape) != len(shape):
+            return X.flatten().reshape(shape)
+        else:
+            axis = [1 if src == dst else dst for src, dst in zip(X.shape, shape)]
+            return np.tile(X, axis)
+
+    def _apply_transformation(self, theta, points, N, P_X, P_Y):
+        if len(theta.shape) == 2:
+            theta = np.expand_dims(np.expand_dims(theta, axis=2), axis=3)
+
+        batch_size = theta.shape[0]
+
+        P_X_base = np.copy(P_X)
+        P_Y_base = np.copy(P_Y)
+
+        Li = self._compute_L_inverse(np.reshape(P_X, (N, -1)), np.reshape(P_Y, (N, -1)))
+        Li = np.expand_dims(Li, axis=0)
+
+        # split theta into point coordinates
+        Q_X = np.squeeze(theta[:, :N, :, :], axis=3)
+        Q_Y = np.squeeze(theta[:, N:, :, :], axis=3)
+
+        Q_X += self._expand_torch(P_X_base, Q_X.shape)
+        Q_Y += self._expand_torch(P_Y_base, Q_Y.shape)
+
+        points_b = points.shape[0]
+        points_h = points.shape[1]
+        points_w = points.shape[2]
+
+        P_X = self._expand_torch(P_X, (1, points_h, points_w, 1, N))
+        P_Y = self._expand_torch(P_Y, (1, points_h, points_w, 1, N))
+
+        W_X = self._expand_torch(Li[:,:N,:N], (batch_size, N, N)) @ Q_X
+        W_Y = self._expand_torch(Li[:,:N,:N], (batch_size, N, N)) @ Q_Y
+
+        W_X = np.expand_dims(np.expand_dims(W_X, axis=3), axis=4).transpose(0, 4, 2, 3, 1)
+        W_X = np.repeat(W_X, points_h, axis=1)
+        W_X = np.repeat(W_X, points_w, axis=2)
+
+        W_Y = np.expand_dims(np.expand_dims(W_Y, axis=3), axis=4).transpose(0, 4, 2, 3, 1)
+        W_Y = np.repeat(W_Y, points_h, axis=1)
+        W_Y = np.repeat(W_Y, points_w, axis=2)
+
+        A_X = self._expand_torch(Li[:, N:, :N], (batch_size, 3, N)) @ Q_X
+        A_Y = self._expand_torch(Li[:, N:, :N], (batch_size, 3, N)) @ Q_Y
+
+        A_X = np.expand_dims(np.expand_dims(A_X, axis=3), axis=4).transpose(0, 4, 2, 3, 1)
+        A_X = np.repeat(A_X, points_h, axis=1)
+        A_X = np.repeat(A_X, points_w, axis=2)
+
+        A_Y = np.expand_dims(np.expand_dims(A_Y, axis=3), axis=4).transpose(0, 4, 2, 3, 1)
+        A_Y = np.repeat(A_Y, points_h, axis=1)
+        A_Y = np.repeat(A_Y, points_w, axis=2)
+
+        points_X_for_summation = np.expand_dims(np.expand_dims(points[:, :, :, 0], axis=3), axis=4)
+        points_X_for_summation = self._expand_torch(points_X_for_summation, points[:, :, :, 0].shape + (1, N))
+
+        points_Y_for_summation = np.expand_dims(np.expand_dims(points[:, :, :, 1], axis=3), axis=4)
+        points_Y_for_summation = self._expand_torch(points_Y_for_summation, points[:, :, :, 0].shape + (1, N))
+
+        if points_b == 1:
+            delta_X = points_X_for_summation - P_X
+            delta_Y = points_Y_for_summation - P_Y
+        else:
+            delta_X = points_X_for_summation - self._expand_torch(P_X, points_X_for_summation.shape)
+            delta_Y = points_Y_for_summation - self._expand_torch(P_Y, points_Y_for_summation.shape)
+
+        dist_squared = np.power(delta_X, 2) + np.power(delta_Y, 2)
+        dist_squared[dist_squared == 0] = 1
+        U = np.multiply(dist_squared, np.log(dist_squared))
+
+        points_X_batch = np.expand_dims(points[:,:,:,0], axis=3)
+        points_Y_batch = np.expand_dims(points[:,:,:,1], axis=3)
+
+        if points_b == 1:
+            points_X_batch = self._expand_torch(points_X_batch, (batch_size, ) + points_X_batch.shape[1:])
+            points_Y_batch = self._expand_torch(points_Y_batch, (batch_size, ) + points_Y_batch.shape[1:])
+
+        points_X_prime = A_X[:,:,:,:,0]+ \
+                        np.multiply(A_X[:,:,:,:,1], points_X_batch) + \
+                        np.multiply(A_X[:,:,:,:,2], points_Y_batch) + \
+                        np.sum(np.multiply(W_X, self._expand_torch(U, W_X.shape)), 4)
+
+        points_Y_prime = A_Y[:,:,:,:,0]+ \
+                        np.multiply(A_Y[:,:,:,:,1], points_X_batch) + \
+                        np.multiply(A_Y[:,:,:,:,2], points_Y_batch) + \
+                        np.sum(np.multiply(W_Y, self._expand_torch(U, W_Y.shape)), 4)
+
+        return np.concatenate((points_X_prime, points_Y_prime), 3)
+
+    def _generate_grid(self, theta):
+        grid_X, grid_Y, N, P_X, P_Y = self._prepare_to_transform()
+        warped_grid = self._apply_transformation(theta, np.concatenate((grid_X, grid_Y), axis=3), N, P_X, P_Y)
+        return warped_grid
+
+    def _bilinear_sampler(self, img, grid):
+        x, y = grid[:,:,:,0], grid[:,:,:,1]
+
+        H = img.shape[2]
+        W = img.shape[3]
+        max_y = H - 1
+        max_x = W - 1
+
+        # rescale x and y to [0, W-1/H-1]
+        x = 0.5 * (x + 1.0) * (max_x - 1)
+        y = 0.5 * (y + 1.0) * (max_y - 1)
+
+        # grab 4 nearest corner points for each (x_i, y_i)
+        x0 = np.floor(x).astype(int)
+        x1 = x0 + 1
+        y0 = np.floor(y).astype(int)
+        y1 = y0 + 1
+
+        # calculate deltas
+        wa = (x1 - x) * (y1 - y)
+        wb = (x1 - x) * (y  - y0)
+        wc = (x - x0) * (y1 - y)
+        wd = (x - x0) * (y  - y0)
+
+        # clip to range [0, H-1/W-1] to not violate img boundaries
+        x0 = np.clip(x0, 0, max_x)
+        x1 = np.clip(x1, 0, max_x)
+        y0 = np.clip(y0, 0, max_y)
+        y1 = np.clip(y1, 0, max_y)
+
+        # get pixel value at corner coords
+        img = img.reshape(-1, H, W)
+        Ia = img[:, y0, x0].swapaxes(0, 1)
+        Ib = img[:, y1, x0].swapaxes(0, 1)
+        Ic = img[:, y0, x1].swapaxes(0, 1)
+        Id = img[:, y1, x1].swapaxes(0, 1)
+
+        wa = np.expand_dims(wa, axis=0)
+        wb = np.expand_dims(wb, axis=0)
+        wc = np.expand_dims(wc, axis=0)
+        wd = np.expand_dims(wd, axis=0)
+
+        # compute output
+        out = wa*Ia + wb*Ib + wc*Ic + wd*Id
+        return out
+
+
+class CorrelationLayer(object):
+    def __init__(self, params, blobs):
+        super(CorrelationLayer, self).__init__()
+
+    def getMemoryShapes(self, inputs):
+        fetureAShape = inputs[0]
+        b, c, h, w = fetureAShape
+        return [[b, h * w, h, w]]
+
+    def forward(self, inputs):
+        feature_A, feature_B = inputs
+        b, c, h, w = feature_A.shape
+        feature_A = feature_A.transpose(0, 1, 3, 2)
+        feature_A = np.reshape(feature_A, (b, c, h * w))
+        feature_B = np.reshape(feature_B, (b, c, h * w))
+        feature_B = feature_B.transpose(0, 2, 1)
+        feature_mul = feature_B @ feature_A
+        feature_mul= np.reshape(feature_mul, (b, h, w, h * w))
+        feature_mul = feature_mul.transpose(0, 1, 3, 2)
+        correlation_tensor = feature_mul.transpose(0, 2, 1, 3)
+        correlation_tensor = np.ascontiguousarray(correlation_tensor)
+        return [correlation_tensor]
+
+
+if __name__ == "__main__":
+    if not os.path.isfile(args.gmm_model):
+        raise OSError("GMM model not exist")
+    if not os.path.isfile(args.tom_model):
+        raise OSError("TOM model not exist")
+    if not os.path.isfile(args.segmentation_model):
+        raise OSError("Segmentation model not exist")
+    if not os.path.isfile(findFile(args.openpose_proto)):
+        raise OSError("OpenPose proto not exist")
+    if not os.path.isfile(findFile(args.openpose_model)):
+        raise OSError("OpenPose model not exist")
+
+    person_img = cv.imread(args.input_image)
+    ratio = 256 / 192
+    inp_h, inp_w, _ = person_img.shape
+    current_ratio = inp_h / inp_w
+    if current_ratio > ratio:
+        center_h = inp_h // 2
+        out_h = inp_w * ratio
+        start = int(center_h - out_h // 2)
+        end = int(center_h + out_h // 2)
+        person_img = person_img[start:end, ...]
+    else:
+        center_w = inp_w // 2
+        out_w = inp_h / ratio
+        start = int(center_w - out_w // 2)
+        end = int(center_w + out_w // 2)
+        person_img = person_img[:, start:end, :]
+
+    cloth_img = cv.imread(args.input_cloth)
+    pose = get_pose_map(person_img, findFile(args.openpose_proto),
+                        findFile(args.openpose_model), args.backend, args.target)
+    segm_image = parse_human(person_img, args.segmentation_model)
+    segm_image = cv.resize(segm_image, (192, 256), cv.INTER_LINEAR)
+
+    cv.dnn_registerLayer('Correlation', CorrelationLayer)
+
+    model = CpVton(args.gmm_model, args.tom_model, args.backend, args.target)
+    agnostic = model.prepare_agnostic(segm_image, person_img, pose)
+    warped_cloth = model.get_warped_cloth(cloth_img, agnostic)
+    output = model.get_tryon(agnostic, warped_cloth)
+
+    cv.dnn_unregisterLayer('Correlation')
+
+    winName = 'Virtual Try-On'
+    cv.namedWindow(winName, cv.WINDOW_AUTOSIZE)
+    cv.imshow(winName, output)
+    cv.waitKey()
diff --git a/samples/python/drawing.py b/samples/python/drawing.py
new file mode 100644
index 0000000000..e5db400dc0
--- /dev/null
+++ b/samples/python/drawing.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+'''
+    This program demonstrates OpenCV drawing and text output functions by drawing different shapes and text strings
+    Usage :
+        python3 drawing.py
+    Press any button to exit
+    '''
+
+# Python 2/3 compatibility
+from __future__ import print_function
+
+import numpy as np
+import cv2 as cv
+
+# Drawing Lines
+def lines():
+    for i in range(NUMBER*2):
+        pt1, pt2 = [], []
+        pt1.append(np.random.randint(x1, x2))
+        pt1.append(np.random.randint(y1, y2))
+        pt2.append(np.random.randint(x1, x2))
+        pt2.append(np.random.randint(y1, y2))
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        arrowed =  np.random.randint(0, 6)
+        if (arrowed<3):
+            cv.line(image, tuple(pt1), tuple(pt2), color, np.random.randint(1, 10), lineType)
+        else:
+            cv.arrowedLine(image, tuple(pt1), tuple(pt2), color, np.random.randint(1, 10), lineType)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY)>=0:
+            return
+
+# Drawing Rectangle
+def rectangle():
+    for i in range(NUMBER*2):
+        pt1, pt2 = [], []
+        pt1.append(np.random.randint(x1, x2))
+        pt1.append(np.random.randint(y1, y2))
+        pt2.append(np.random.randint(x1, x2))
+        pt2.append(np.random.randint(y1, y2))
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        thickness = np.random.randint(-3, 10)
+        marker = np.random.randint(0, 10)
+        marker_size = np.random.randint(30, 80)
+
+        if (marker > 5):
+            cv.rectangle(image, tuple(pt1), tuple(pt2), color, max(thickness, -1), lineType)
+        else:
+            cv.drawMarker(image, tuple(pt1), color, marker, marker_size)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY)>=0:
+            return
+
+# Drawing ellipse
+def ellipse():
+    for i in range(NUMBER*2):
+        center = []
+        center.append(np.random.randint(x1, x2))
+        center.append(np.random.randint(x1, x2))
+        axes = []
+        axes.append(np.random.randint(0, 200))
+        axes.append(np.random.randint(0, 200))
+        angle = np.random.randint(0, 180)
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        thickness = np.random.randint(-1, 9)
+        cv.ellipse(image, tuple(center), tuple(axes), angle, angle-100, angle + 200, color, thickness, lineType)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY)>=0:
+            return
+
+# Drawing Polygonal Curves
+def polygonal():
+    for i in range(NUMBER):
+        pt = [(0, 0)]*6
+        pt = np.resize(pt, (2, 3, 2))
+        pt[0][0][0] = np.random.randint(x1, x2)
+        pt[0][0][1] = np.random.randint(y1, y2)
+        pt[0][1][0] = np.random.randint(x1, x2)
+        pt[0][1][1] = np.random.randint(y1, y2)
+        pt[0][2][0] = np.random.randint(x1, x2)
+        pt[0][2][1] = np.random.randint(y1, y2)
+        pt[1][0][0] = np.random.randint(x1, x2)
+        pt[1][0][1] = np.random.randint(y1, y2)
+        pt[1][1][0] = np.random.randint(x1, x2)
+        pt[1][1][1] = np.random.randint(y1, y2)
+        pt[1][2][0] = np.random.randint(x1, x2)
+        pt[1][2][1] = np.random.randint(y1, y2)
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        alist = []
+        for k in pt[0]:
+            alist.append(k)
+        for k in pt[1]:
+            alist.append(k)
+        ppt = np.array(alist)
+        cv.polylines(image, [ppt], True, color, thickness = np.random.randint(1, 10), lineType = lineType)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY) >= 0:
+            return
+
+# fills an area bounded by several polygonal contours
+def fill():
+    for i in range(NUMBER):
+        pt = [(0, 0)]*6
+        pt = np.resize(pt, (2, 3, 2))
+        pt[0][0][0] = np.random.randint(x1, x2)
+        pt[0][0][1] = np.random.randint(y1, y2)
+        pt[0][1][0] = np.random.randint(x1, x2)
+        pt[0][1][1] = np.random.randint(y1, y2)
+        pt[0][2][0] = np.random.randint(x1, x2)
+        pt[0][2][1] = np.random.randint(y1, y2)
+        pt[1][0][0] = np.random.randint(x1, x2)
+        pt[1][0][1] = np.random.randint(y1, y2)
+        pt[1][1][0] = np.random.randint(x1, x2)
+        pt[1][1][1] = np.random.randint(y1, y2)
+        pt[1][2][0] = np.random.randint(x1, x2)
+        pt[1][2][1] = np.random.randint(y1, y2)
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        alist = []
+        for k in pt[0]:
+            alist.append(k)
+        for k in pt[1]:
+            alist.append(k)
+        ppt = np.array(alist)
+        cv.fillPoly(image, [ppt], color, lineType)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY) >= 0:
+            return
+
+# Drawing Circles
+def circles():
+    for i in range(NUMBER):
+        center = []
+        center.append(np.random.randint(x1, x2))
+        center.append(np.random.randint(x1, x2))
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        cv.circle(image, tuple(center), np.random.randint(0, 300), color, np.random.randint(-1, 9), lineType)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY) >= 0:
+            return
+
+# Draws a text string
+def string():
+    for i in range(NUMBER):
+        org = []
+        org.append(np.random.randint(x1, x2))
+        org.append(np.random.randint(x1, x2))
+        color = "%06x" % np.random.randint(0, 0xFFFFFF)
+        color = tuple(int(color[i:i+2], 16) for i in (0, 2 ,4))
+        cv.putText(image, "Testing text rendering", tuple(org), np.random.randint(0, 8), np.random.randint(0, 100)*0.05+0.1, color, np.random.randint(1, 10), lineType)
+        cv.imshow(wndname, image)
+        if cv.waitKey(DELAY) >= 0:
+            return
+
+
+def string1():
+    textsize = cv.getTextSize("OpenCV forever!", cv.FONT_HERSHEY_COMPLEX, 3, 5)
+    org = (int((width - textsize[0][0])/2), int((height - textsize[0][1])/2))
+    for i in range(0, 255, 2):
+        image2 = np.array(image) - i
+        cv.putText(image2, "OpenCV forever!", org, cv.FONT_HERSHEY_COMPLEX, 3, (i, i, 255), 5, lineType)
+        cv.imshow(wndname, image2)
+        if cv.waitKey(DELAY) >= 0:
+            return
+
+if __name__ == '__main__':
+    print(__doc__)
+    wndname = "Drawing Demo"
+    NUMBER = 100
+    DELAY = 5
+    width, height = 1000, 700
+    lineType = cv.LINE_AA  # change it to LINE_8 to see non-antialiased graphics
+    x1, x2, y1, y2 = -width/2, width*3/2, -height/2, height*3/2
+    image = np.zeros((height, width, 3), dtype = np.uint8)
+    cv.imshow(wndname, image)
+    cv.waitKey(DELAY)
+    lines()
+    rectangle()
+    ellipse()
+    polygonal()
+    fill()
+    circles()
+    string()
+    string1()
+    cv.waitKey(0)
+    cv.destroyAllWindows()
\ No newline at end of file
diff --git a/samples/python/grabcut.py b/samples/python/grabcut.py
index f63c0cb387..e2a035a91c 100644
--- a/samples/python/grabcut.py
+++ b/samples/python/grabcut.py
@@ -11,10 +11,10 @@ USAGE:
 README FIRST:
     Two windows will show up, one for input and one for output.
 
-    At first, in input window, draw a rectangle around the object using
-mouse right button. Then press 'n' to segment the object (once or a few times)
+    At first, in input window, draw a rectangle around the object using the
+right mouse button. Then press 'n' to segment the object (once or a few times)
 For any finer touch-ups, you can press any of the keys below and draw lines on
-the areas you want. Then again press 'n' for updating the output.
+the areas you want. Then again press 'n' to update the output.
 
 Key '0' - To select areas of sure background
 Key '1' - To select areas of sure foreground
@@ -44,8 +44,8 @@ class App():
 
     DRAW_BG = {'color' : BLACK, 'val' : 0}
     DRAW_FG = {'color' : WHITE, 'val' : 1}
-    DRAW_PR_FG = {'color' : GREEN, 'val' : 3}
     DRAW_PR_BG = {'color' : RED, 'val' : 2}
+    DRAW_PR_FG = {'color' : GREEN, 'val' : 3}
 
     # setting up flags
     rect = (0,0,1,1)
@@ -160,14 +160,12 @@ class App():
                 print(""" For finer touchups, mark foreground and background after pressing keys 0-3
                 and again press 'n' \n""")
                 try:
+                    bgdmodel = np.zeros((1, 65), np.float64)
+                    fgdmodel = np.zeros((1, 65), np.float64)
                     if (self.rect_or_mask == 0):         # grabcut with rect
-                        bgdmodel = np.zeros((1, 65), np.float64)
-                        fgdmodel = np.zeros((1, 65), np.float64)
                         cv.grabCut(self.img2, self.mask, self.rect, bgdmodel, fgdmodel, 1, cv.GC_INIT_WITH_RECT)
                         self.rect_or_mask = 1
-                    elif self.rect_or_mask == 1:         # grabcut with mask
-                        bgdmodel = np.zeros((1, 65), np.float64)
-                        fgdmodel = np.zeros((1, 65), np.float64)
+                    elif (self.rect_or_mask == 1):       # grabcut with mask
                         cv.grabCut(self.img2, self.mask, self.rect, bgdmodel, fgdmodel, 1, cv.GC_INIT_WITH_MASK)
                 except:
                     import traceback
diff --git a/samples/python/laplace.py b/samples/python/laplace.py
new file mode 100644
index 0000000000..f485e5741c
--- /dev/null
+++ b/samples/python/laplace.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+'''
+    This program demonstrates Laplace point/edge detection using
+    OpenCV function Laplacian()
+    It captures from the camera of your choice: 0, 1, ... default 0
+    Usage:
+        python laplace.py <ddepth> <smoothType> <sigma>
+        If no arguments given default arguments will be used.
+
+    Keyboard Shortcuts:
+    Press space bar to exit the program.
+    '''
+
+# Python 2/3 compatibility
+from __future__ import print_function
+
+import numpy as np
+import cv2 as cv
+import sys
+
+def main():
+    # Declare the variables we are going to use
+    ddepth = cv.CV_16S
+    smoothType = "MedianBlur"
+    sigma = 3
+    if len(sys.argv)==4:
+        ddepth = sys.argv[1]
+        smoothType = sys.argv[2]
+        sigma = sys.argv[3]
+    # Taking input from the camera
+    cap=cv.VideoCapture(0)
+    # Create Window and Trackbar
+    cv.namedWindow("Laplace of Image", cv.WINDOW_AUTOSIZE)
+    cv.createTrackbar("Kernel Size Bar", "Laplace of Image", sigma, 15, lambda x:x)
+    # Printing frame width, height and FPS
+    print("=="*40)
+    print("Frame Width: ", cap.get(cv.CAP_PROP_FRAME_WIDTH), "Frame Height: ", cap.get(cv.CAP_PROP_FRAME_HEIGHT), "FPS: ", cap.get(cv.CAP_PROP_FPS))
+    while True:
+        # Reading input from the camera
+        ret, frame = cap.read()
+        if ret == False:
+            print("Can't open camera/video stream")
+            break
+        # Taking input/position from the trackbar
+        sigma = cv.getTrackbarPos("Kernel Size Bar", "Laplace of Image")
+        # Setting kernel size
+        ksize = (sigma*5)|1
+        # Removing noise by blurring with a filter
+        if smoothType == "GAUSSIAN":
+            smoothed = cv.GaussianBlur(frame, (ksize, ksize), sigma, sigma)
+        if smoothType == "BLUR":
+            smoothed = cv.blur(frame, (ksize, ksize))
+        if smoothType == "MedianBlur":
+            smoothed = cv.medianBlur(frame, ksize)
+
+        # Apply Laplace function
+        laplace = cv.Laplacian(smoothed, ddepth, 5)
+        # Converting back to uint8
+        result = cv.convertScaleAbs(laplace, (sigma+1)*0.25)
+        # Display Output
+        cv.imshow("Laplace of Image", result)
+        k = cv.waitKey(30)
+        if k == 27:
+            return
+if __name__ == "__main__":
+    print(__doc__)
+    main()
+    cv.destroyAllWindows()