From f3cebb3e1bac73e8bdddd3de96049a46122a0214 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 12 Aug 2020 17:32:16 +0300 Subject: [PATCH] Merge pull request #18077 from l-bat:reduce_sum * Supported ReduceSum op * Skip test --- modules/dnn/src/layers/pooling_layer.cpp | 26 +++++++++++--- modules/dnn/src/tensorflow/tf_importer.cpp | 41 +++++++++++++++++++--- modules/dnn/test/test_darknet_importer.cpp | 2 ++ modules/dnn/test/test_tf_importer.cpp | 12 +++++++ 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 3f2a0f7d03..fd08fdbeb3 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -98,6 +98,8 @@ public: type = AVE; else if (pool == "stochastic") type = STOCHASTIC; + else if (pool == "sum") + type = SUM; else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); @@ -195,7 +197,7 @@ public: return type == MAX || type == AVE; } else - return type != STOCHASTIC; + return type != STOCHASTIC && type != SUM; } #endif if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) @@ -288,7 +290,7 @@ public: maxPooling(inputs[0], outputs[0], mask); break; } - case AVE: + case AVE: case SUM: CV_Assert_N(inputs.size() == 1, outputs.size() == 1); avePooling(inputs[0], outputs[0]); break; @@ -366,7 +368,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { - CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size()); + CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE || type == SUM)) || inputs.size() == 2, nodes.size() == inputs.size()); auto& ieInpNode = nodes[0].dynamicCast()->node; ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT; @@ -381,6 +383,19 @@ virtual Ptr initNgraph(const std::vector >& inp exclude_pad, rounding_type, pad_type); return Ptr(new InfEngineNgraphNode(ave_pool)); } + else if (type == SUM) { + ngraph::Shape inpShape = ieInpNode->get_shape(); + CV_Assert(inpShape.size() == 2 + kernel_size.size()); + std::vector axes; + for (size_t i = 0; i < kernel_size.size(); i++) + { + if (inpShape[2 + i] == kernel_size[i]) + axes.push_back(2 + i); + } + auto reduction_axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes.size()}, axes); + auto reduce_sum = std::make_shared(ieInpNode, reduction_axes, true); + return Ptr(new InfEngineNgraphNode(reduce_sum)); + } else if (type == MAX) { auto max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), @@ -739,7 +754,7 @@ virtual Ptr initNgraph(const std::vector >& inp } } } - else if (poolingType == AVE) + else if (poolingType == AVE || poolingType == SUM) { for( ; x0 < x1; ++x0) { @@ -750,7 +765,7 @@ virtual Ptr initNgraph(const std::vector >& inp xend = min(xend, inp_width); float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta * ddelta : ((dend - dstart) * (yend - ystart) * (xend - xstart)); - inv_kernel_area = 1.0 / inv_kernel_area; + inv_kernel_area = poolingType == AVE ? 1.0 / inv_kernel_area : 1.0; #if CV_SIMD128 if( isPool2D && xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width ) { @@ -1095,6 +1110,7 @@ private: MAX, AVE, STOCHASTIC, + SUM, ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf }; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index c005c99b58..9083a4d4f9 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2067,7 +2067,7 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); } - else if (type == "Mean") + else if (type == "Mean" || type == "Sum") { // Computes the mean of elements across dimensions of a tensor. // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, @@ -2116,7 +2116,7 @@ void TFImporter::populateNet(Net dstNet) LayerParams avgLp; std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); - avgLp.set("pool", "ave"); + avgLp.set("pool", type == "Mean" ? "ave" : "sum"); // pooling kernel H x 1 avgLp.set("global_pooling_h", true); avgLp.set("kernel_w", 1); @@ -2153,11 +2153,44 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; connect(layer_id, dstNet, Pin(avgName), id, 0); connect(layer_id, dstNet, Pin(layerShapeName), id, 1); + } else if (indices.total() == 1) { + int axis = toNCHW(indices.at(0)); + if (axis == 2 || axis == 3) + { + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); + layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + + if (!keepDims) + { + // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + permLP.set("order", DictValue::arrayInt(order, 4)); + std::string permName = name + "/nchw"; + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, Pin(name), permId, 0); + + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(permName), squeezeId, 0); + } + } } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) - CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); + CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); - layerParams.set("pool", "ave"); + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); layerParams.set("global_pooling", true); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 45edf405ac..4986e8e399 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -755,6 +755,8 @@ TEST_P(Test_Darknet_layers, connected) TEST_P(Test_Darknet_layers, relu) { + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); testDarknetLayer("relu"); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index d95c46b5d3..68b720a375 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -128,6 +128,13 @@ TEST_P(Test_TensorFlow_layers, reduce_mean) runTensorFlowNet("global_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_sum) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + runTensorFlowNet("sum_pool_by_axis"); +} + TEST_P(Test_TensorFlow_layers, conv_single_conv) { runTensorFlowNet("single_conv"); @@ -340,6 +347,11 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_mean) runTensorFlowNet("reduce_mean"); // an average pooling over all spatial dimensions. } +TEST_P(Test_TensorFlow_layers, pooling_reduce_sum) +{ + runTensorFlowNet("reduce_sum"); // a SUM pooling over all spatial dimensions. +} + TEST_P(Test_TensorFlow_layers, max_pool_grad) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)