diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 91da8835a2..51a28d94b5 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -824,6 +824,13 @@ public: for (int i = 0; i < inputs.size(); ++i) CV_Assert(inputs[i].u != outputs[0].u); + int inpH = inputs[0].size[2]; + int inpW = inputs[0].size[3]; + int out_h = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1; + int out_w = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1; + if (out_h != outputs[0].size[2] || out_w != outputs[0].size[3]) + return false; + int group = inputs[0].size[1] / umat_blobs[0].size[1]; if (convolutionOp.empty()) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 87b5d706d4..70d7dfb0b1 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -249,7 +249,8 @@ public: kernel.set(6, (int)num_loc_classes); kernel.set(7, (int)background_label_id); kernel.set(8, (int)clip); - kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat)); + kernel.set(9, (int)_locPredTransposed); + kernel.set(10, ocl::KernelArg::PtrWriteOnly(outmat)); if (!kernel.run(1, &nthreads, NULL, false)) return false; diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index 5db55c302e..45f48e7c32 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -317,7 +317,17 @@ public: variance.copyTo(umat_variance); int real_numPriors = _numPriors >> (_offsetsX.size() - 1); - umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); + if (_scales.empty()) + { + _scales.resize(real_numPriors, 1.0f); + umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); + } + else + { + CV_Assert(_scales.size() == real_numPriors); + Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]); + scales.copyTo(umat_scales); + } } size_t nthreads = _layerHeight * _layerWidth; diff --git a/modules/dnn/src/opencl/detection_output.cl b/modules/dnn/src/opencl/detection_output.cl index f5932cc82a..cdd236390b 100644 --- a/modules/dnn/src/opencl/detection_output.cl +++ b/modules/dnn/src/opencl/detection_output.cl @@ -51,6 +51,7 @@ __kernel void DecodeBBoxesCORNER(const int nthreads, const int num_loc_classes, const int background_label_id, const int clip_bbox, + const int locPredTransposed, __global Dtype* bbox_data) { for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) @@ -75,10 +76,18 @@ __kernel void DecodeBBoxesCORNER(const int nthreads, bbox_vec = loc_vec * prior_variance; } - bbox_xmin = bbox_vec.x; - bbox_ymin = bbox_vec.y; - bbox_xmax = bbox_vec.z; - bbox_ymax = bbox_vec.w; + if (locPredTransposed) + { + bbox_ymin = bbox_vec.x; + bbox_xmin = bbox_vec.y; + bbox_ymax = bbox_vec.z; + bbox_xmax = bbox_vec.w; + } else { + bbox_xmin = bbox_vec.x; + bbox_ymin = bbox_vec.y; + bbox_xmax = bbox_vec.z; + bbox_ymax = bbox_vec.w; + } Dtype4 prior_vec = vload4(0, prior_data + p); Dtype val; @@ -114,6 +123,7 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads, const int num_loc_classes, const int background_label_id, const int clip_bbox, + const int locPredTransposed, __global Dtype* bbox_data) { for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) @@ -138,10 +148,18 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads, bbox_vec = loc_vec * prior_variance; } - bbox_xmin = bbox_vec.x; - bbox_ymin = bbox_vec.y; - bbox_xmax = bbox_vec.z; - bbox_ymax = bbox_vec.w; + if (locPredTransposed) + { + bbox_ymin = bbox_vec.x; + bbox_xmin = bbox_vec.y; + bbox_ymax = bbox_vec.z; + bbox_xmax = bbox_vec.w; + } else { + bbox_xmin = bbox_vec.x; + bbox_ymin = bbox_vec.y; + bbox_xmax = bbox_vec.z; + bbox_ymax = bbox_vec.w; + } Dtype4 prior_vec = vload4(0, prior_data + p); Dtype prior_width = prior_vec.z - prior_vec.x; diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 6ed926215e..89ed5d0cbb 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -174,9 +174,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow) { - if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL || - backend == DNN_BACKEND_HALIDE) - throw SkipTestException(""); + if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); Mat sample = imread(findDataFile("dnn/street.png", false)); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt", diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 1210d12e93..e6504f179e 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -285,7 +285,7 @@ TEST(Test_TensorFlow, Inception_v2_SSD) normAssert(detections, ref); } -OCL_TEST(Test_TensorFlow, DISABLED_MobileNet_SSD) +OCL_TEST(Test_TensorFlow, MobileNet_SSD) { std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false); std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false); @@ -317,8 +317,8 @@ OCL_TEST(Test_TensorFlow, DISABLED_MobileNet_SSD) std::vector output; net.forward(output, outNames); - normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1)); - normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 2e-4); + normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4); + normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4); normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2); }