diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 91da8835a2..51a28d94b5 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -824,6 +824,13 @@ public:
         for (int i = 0; i < inputs.size(); ++i)
             CV_Assert(inputs[i].u != outputs[0].u);
 
+        int inpH = inputs[0].size[2];
+        int inpW = inputs[0].size[3];
+        int out_h = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
+        int out_w = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
+        if (out_h != outputs[0].size[2] || out_w != outputs[0].size[3])
+            return false;
+
         int group = inputs[0].size[1] / umat_blobs[0].size[1];
 
         if (convolutionOp.empty())
diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
index 87b5d706d4..70d7dfb0b1 100644
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -249,7 +249,8 @@ public:
             kernel.set(6, (int)num_loc_classes);
             kernel.set(7, (int)background_label_id);
             kernel.set(8, (int)clip);
-            kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat));
+            kernel.set(9, (int)_locPredTransposed);
+            kernel.set(10, ocl::KernelArg::PtrWriteOnly(outmat));
 
             if (!kernel.run(1, &nthreads, NULL, false))
                 return false;
diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp
index 5db55c302e..45f48e7c32 100644
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -317,7 +317,17 @@ public:
             variance.copyTo(umat_variance);
 
             int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
-            umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
+            if (_scales.empty())
+            {
+                _scales.resize(real_numPriors, 1.0f);
+                umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
+            }
+            else
+            {
+                CV_Assert(_scales.size() == real_numPriors);
+                Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]);
+                scales.copyTo(umat_scales);
+            }
         }
 
         size_t nthreads = _layerHeight * _layerWidth;
diff --git a/modules/dnn/src/opencl/detection_output.cl b/modules/dnn/src/opencl/detection_output.cl
index f5932cc82a..cdd236390b 100644
--- a/modules/dnn/src/opencl/detection_output.cl
+++ b/modules/dnn/src/opencl/detection_output.cl
@@ -51,6 +51,7 @@ __kernel void DecodeBBoxesCORNER(const int nthreads,
                                  const int num_loc_classes,
                                  const int background_label_id,
                                  const int clip_bbox,
+                                 const int locPredTransposed,
                                  __global Dtype* bbox_data)
 {
     for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
@@ -75,10 +76,18 @@ __kernel void DecodeBBoxesCORNER(const int nthreads,
             bbox_vec = loc_vec * prior_variance;
         }
 
-        bbox_xmin = bbox_vec.x;
-        bbox_ymin = bbox_vec.y;
-        bbox_xmax = bbox_vec.z;
-        bbox_ymax = bbox_vec.w;
+        if (locPredTransposed)
+        {
+            bbox_ymin = bbox_vec.x;
+            bbox_xmin = bbox_vec.y;
+            bbox_ymax = bbox_vec.z;
+            bbox_xmax = bbox_vec.w;
+        } else {
+            bbox_xmin = bbox_vec.x;
+            bbox_ymin = bbox_vec.y;
+            bbox_xmax = bbox_vec.z;
+            bbox_ymax = bbox_vec.w;
+        }
 
         Dtype4 prior_vec = vload4(0, prior_data + p);
         Dtype val;
@@ -114,6 +123,7 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
                                       const int num_loc_classes,
                                       const int background_label_id,
                                       const int clip_bbox,
+                                      const int locPredTransposed,
                                       __global Dtype* bbox_data)
 {
     for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
@@ -138,10 +148,18 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
             bbox_vec = loc_vec * prior_variance;
         }
 
-        bbox_xmin = bbox_vec.x;
-        bbox_ymin = bbox_vec.y;
-        bbox_xmax = bbox_vec.z;
-        bbox_ymax = bbox_vec.w;
+        if (locPredTransposed)
+        {
+            bbox_ymin = bbox_vec.x;
+            bbox_xmin = bbox_vec.y;
+            bbox_ymax = bbox_vec.z;
+            bbox_xmax = bbox_vec.w;
+        } else {
+            bbox_xmin = bbox_vec.x;
+            bbox_ymin = bbox_vec.y;
+            bbox_xmax = bbox_vec.z;
+            bbox_ymax = bbox_vec.w;
+        }
 
         Dtype4 prior_vec = vload4(0, prior_data + p);
         Dtype prior_width = prior_vec.z - prior_vec.x;
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 6ed926215e..89ed5d0cbb 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -174,9 +174,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
 
 TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
 {
-    if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
-        backend == DNN_BACKEND_HALIDE)
-        throw SkipTestException("");
+    if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
     processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 1210d12e93..e6504f179e 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -285,7 +285,7 @@ TEST(Test_TensorFlow, Inception_v2_SSD)
     normAssert(detections, ref);
 }
 
-OCL_TEST(Test_TensorFlow, DISABLED_MobileNet_SSD)
+OCL_TEST(Test_TensorFlow, MobileNet_SSD)
 {
     std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
     std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
@@ -317,8 +317,8 @@ OCL_TEST(Test_TensorFlow, DISABLED_MobileNet_SSD)
     std::vector<Mat> output;
     net.forward(output, outNames);
 
-    normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1));
-    normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 2e-4);
+    normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
+    normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
     normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
 }