From 14da5ec311891859489a63a04faa83081d073ac8 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sun, 15 Mar 2020 22:33:05 +0300 Subject: [PATCH 1/5] LSTM scalar --- modules/dnn/src/layers/recurrent_layers.cpp | 10 ++ .../dnn/src/onnx/onnx_graph_simplifier.cpp | 25 +++ modules/dnn/src/onnx/onnx_importer.cpp | 165 ++++++++++++++++-- modules/dnn/src/tensorflow/tf_importer.cpp | 7 + modules/dnn/test/test_onnx_importer.cpp | 11 ++ 5 files changed, 204 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 3f9a229516..a3962db127 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -215,6 +215,8 @@ public: internals.push_back(shape(_numSamples, 1)); // dummyOnes internals.push_back(shape(_numSamples, 4*_numOut)); // gates + + std::cout << "LSTM out: " << outputs[0] << '\n'; return false; } @@ -301,6 +303,8 @@ public: tsEnd = numTimeStamps; tsInc = 1; } + std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << '\n'; + std::cout << tsStart << " " << tsEnd << '\n'; for (int ts = tsStart; ts != tsEnd; ts += tsInc) { Range curRowRange(ts*numSamples, (ts + 1)*numSamples); @@ -314,6 +318,7 @@ public: Mat gateF = gates.colRange(1*numOut, 2*numOut); Mat gateO = gates.colRange(2*numOut, 3*numOut); Mat gateG = gates.colRange(3*numOut, 4*numOut); + std::cout << "i " << gateI << '\n'; if (forgetBias) add(gateF, forgetBias, gateF); @@ -329,6 +334,7 @@ public: { Mat gatesIFO = gates.colRange(0, 3*numOut); sigmoid(gatesIFO, gatesIFO); + std::cout << "ifo " << gatesIFO << '\n'; } tanh(gateG, gateG); @@ -345,12 +351,15 @@ public: } if (usePeephole) { + std::cout << "if (usePeephole)" << '\n'; gemm(cInternal, blobs[5], 1, gateO, 1, gateO); sigmoid(gateO, gateO); } //compute h_t tanh(cInternal, hInternal); + std::cout << "o " << gateO << '\n'; + std::cout << "tanh(o) " << hInternal << '\n'; multiply(gateO, hInternal, hInternal); //save results in output blobs @@ -358,6 +367,7 @@ public: if (produceCellOutput) cInternal.copyTo(cOutTs.rowRange(curRowRange)); } + std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << '\n'; } }; diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index fe96927840..6693a75ff4 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -290,6 +290,30 @@ public: } }; +// // To remove Squeeze after LSTM for non-bidirectional LSTM +// class LSTMSqueeze : public Subgraph +// { +// public: +// LSTMSqueeze() +// { +// int input = addNodeToMatch(""); +// +// std::vector lstmInps(7); +// lstmInps[0] = input; +// +// for (int i = 1; i < 4; ++i) +// lstmInps[i] = addNodeToMatch("Unsqueeze"); +// lstmInps[4] = addNodeToMatch(""); +// for (int i = 5; i < 7; ++i) +// lstmInps[i] = addNodeToMatch("ConstantOfShape"); +// +// int lstm = addNodeToMatch("LSTM", lstmInps); +// addNodeToMatch("Squeeze", lstm); +// +// setFusedNode("LSTM", lstmInps); +// } +// }; + void simplifySubgraphs(opencv_onnx::GraphProto& net) { std::vector > subgraphs; @@ -299,6 +323,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); + // subgraphs.push_back(makePtr()); simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 7913fa729d..bcf3d28eed 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -322,7 +322,7 @@ void ONNXImporter::populateNet(Net dstNet) std::string layer_type = node_proto.op_type(); layerParams.type = layer_type; - + std::cout << layerParams.name << " " << layer_type << '\n'; if (layer_type == "MaxPool") { @@ -457,6 +457,19 @@ void ONNXImporter::populateNet(Net dstNet) constBlobs.insert(std::make_pair(layerParams.name, sliced[0])); continue; } + + layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); + layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); + + CV_Assert(node_proto.input_size() == 1); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + std::vector inputs(1, getBlob(node_proto, constBlobs, 0)), sliced; + runLayer(layerParams, inputs, sliced); + CV_Assert(sliced.size() == 1); + constBlobs.insert(std::make_pair(layerParams.name, sliced[0])); + continue; + } } else if (layer_type == "Split") { @@ -579,6 +592,117 @@ void ONNXImporter::populateNet(Net dstNet) constBlobs.insert(std::make_pair(layerParams.name, layerParams.blobs[0])); continue; } + else if (layer_type == "ConstantFill" || layer_type == "ConstantOfShape") + { + CV_Assert_N(node_proto.input_size()); + MatShape inpShape = getBlob(node_proto, constBlobs, 0); + float value = layerParams.get("value", 0); + Mat fill(inpShape.size(), &inpShape[0], CV_32F, Scalar(value)); + constBlobs.insert(std::make_pair(layerParams.name, fill)); + continue; + } + else if (layer_type == "LSTM") + { + std::cout << "~~~~~~" << '\n'; + std::cout << layerParams << '\n'; + for (int i = 1; i < node_proto.input_size(); ++i) { + std::cout << "i: " << node_proto.input(i) << " " << constBlobs[node_proto.input(i)].size << '\n'; + } + + CV_Assert(node_proto.input_size() == 7); + Mat Wx = getBlob(node_proto, constBlobs, 1); + Mat Wh = getBlob(node_proto, constBlobs, 2); + Mat b = getBlob(node_proto, constBlobs, 3); + + + std::cout << Wx.size << '\n'; + std::cout << Wh.size << '\n'; + + int Wx_shape[] = {Wx.size[1], Wx.size[2]}; + int Wh_shape[] = {Wh.size[1], Wh.size[2]}; + std::cout << "b.size " << b.size << '\n'; + int b_shape[] = {2, b.size[1] / 2}; + + Wx = Wx.reshape(1, 2, &Wx_shape[0]); + b = b.reshape(1, 2, &b_shape[0]); + + std::cout << "b ----------------" << '\n'; + + std::cout << b << '\n'; + reduce(b, b, 0, REDUCE_SUM); + std::cout << b << '\n'; + + // https://pytorch.org/docs/stable/nn.html#lstm + // IFGO->IFOG + // swap each 3rd and 4th rows + // Wx = Wx.t(); + + float* weightData = (float*)Wx.data; + std::swap(weightData[1], weightData[2]); + + float* biasData = (float*)b.data; + std::swap(biasData[1], biasData[2]); + + // std::swap(weightData[2], weightData[3]); + // + // weightData = (float*)Wh.data; + // std::swap(weightData[1], weightData[2]); + // std::swap(weightData[2], weightData[3]); + + + // const int outSize = Wx.cols / 4; + // for (int i = 0; i < Wx.rows; ++i) + // for (int j = 0; j < outSize; ++j) + // { + // // std::swap(weightData[i * W.cols + 1 * outSize + j], + // // weightData[i * W.cols + 2 * outSize + j]); + // std::swap(weightData[i * Wx.cols + 2 * outSize + j], + // weightData[i * Wx.cols + 3 * outSize + j]); + // } + + // float* weightData = Wx.ptr(); + // for (int j = 0; j < 5; ++j) + // { + // std::cout << "swap " << (10 + j) << " " << (15 + j) << '\n'; + // for (int i = 0; i < 12; ++i) + // std::swap(weightData[(10 + j) * 12 + i], + // weightData[(15 + j) * 12 + i]); + // } + + layerParams.blobs.resize(3); + layerParams.blobs[0] = Wh.reshape(1, 2, &Wh_shape[0]); + layerParams.blobs[1] = Wx; + layerParams.blobs[2] = b; + + std::cout << "Wx" << '\n'; + std::cout << layerParams.blobs[1] << '\n'; + + std::cout << "Wh" << '\n'; + std::cout << layerParams.blobs[0] << '\n'; + + // layerParams.set("reverse", true); + + + // layerParams.set("use_peephole", true); + // layerParams.blobs.resize(6); + // for (int i = 0; i < 3; ++i) + // { + // Mat w = Mat::eye(layerParams.blobs[0].cols, layerParams.blobs[0].cols, CV_32F); + // layerParams.blobs[3 + i] = w; + // } + + // std::cout << layerParams.blobs[1] << '\n'; + + // int lstmId = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + // + // layerParams = LayerParams(); + // + // // Add reshape + // int shape[] = {1, 10, 11, 5}; + // layerParams.name = node_proto.output(0) + "/reshape"; + // layerParams.type = "Reshape"; + // layerParams.set("dim", DictValue::arrayInt(&shape[0], 4)); + } else if (layer_type == "ImageScaler") { const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; @@ -881,14 +1005,14 @@ void ONNXImporter::populateNet(Net dstNet) else if (layer_type == "Squeeze") { CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); - DictValue axes_dict = layerParams.get("axes"); - if (axes_dict.size() != 1) - CV_Error(Error::StsNotImplemented, "Multidimensional squeeze"); - - int axis = axes_dict.getIntValue(0); - layerParams.set("axis", axis - 1); - layerParams.set("end_axis", axis); - layerParams.type = "Flatten"; + // DictValue axes_dict = layerParams.get("axes"); + // if (axes_dict.size() != 1) + // CV_Error(Error::StsNotImplemented, "Multidimensional squeeze"); + // + // int axis = axes_dict.getIntValue(0); + // layerParams.set("axis", axis - 1); + // layerParams.set("end_axis", axis); + layerParams.type = "Identity"; } else if (layer_type == "Flatten") { @@ -1032,17 +1156,30 @@ void ONNXImporter::populateNet(Net dstNet) else if (layer_type == "Gather") { CV_Assert(node_proto.input_size() == 2); - CV_Assert(layerParams.has("axis")); Mat input = getBlob(node_proto, constBlobs, 0); Mat indexMat = getBlob(node_proto, constBlobs, 1); CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); int index = indexMat.at(0); - int axis = layerParams.get("axis"); - std::vector ranges(input.dims, Range::all()); - ranges[axis] = Range(index, index + 1); + Mat out; + if (layerParams.has("axis")) + { + int axis = layerParams.get("axis"); - Mat out = input(ranges); + std::vector ranges(input.dims, Range::all()); + ranges[axis] = Range(index, index + 1); + + out = input(ranges); + } + else + { + CV_Assert(index < input.total()); + const int dims = input.dims; + input = input.reshape(1, 1); + input.dims = 2; + out = input.reshape(1, 1).colRange(index, index + 1); + out.dims = dims; + } constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index fe7e47f7a0..60ba6d39c5 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1826,10 +1826,12 @@ void TFImporter::populateNet(Net dstNet) const int outSize = W.cols / 4; // IGFO->IFOG + std::cout << "(TF) W " << W.size << '\n'; float* weightData = (float*)W.data; for (int i = 0; i < W.rows; ++i) for (int j = 0; j < outSize; ++j) { + // std::cout << "swap " << i * W.cols + 1 * outSize << " " << i * W.cols + 2 * outSize << '\n'; std::swap(weightData[i * W.cols + 1 * outSize + j], weightData[i * W.cols + 2 * outSize + j]); std::swap(weightData[i * W.cols + 2 * outSize + j], @@ -1838,6 +1840,11 @@ void TFImporter::populateNet(Net dstNet) Wx = W.rowRange(0, W.rows - outSize).t(); Wh = W.rowRange(W.rows - outSize, W.rows).t(); + std::cout << "(TF) Wx " << Wx.size << '\n'; + std::cout << "(TF) Wh " << Wh.size << '\n'; + std::cout << "(TF) b " << b.size << '\n'; + + layerParams.blobs.resize(3); layerParams.blobs[0] = Wh; layerParams.blobs[1] = Wx; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 40110d2542..c5b243b8ab 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -79,6 +79,12 @@ public: netSoftmax.setInput(ref); ref = netSoftmax.forward(); } + std::cout << "ref: " << ref.size << '\n'; + std::cout << "out: " << out.size << '\n'; + std::cout << ref.reshape(1, 1) << '\n'; + std::cout << '\n'; + std::cout << out.reshape(1, 1) << '\n'; + normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf); if (checkNoFallbacks) expectNoFallbacksFromIE(net); @@ -451,6 +457,11 @@ TEST_P(Test_ONNX_layers, Split_EltwiseMax) testONNXModels("split_max"); } +TEST_P(Test_ONNX_layers, LSTM) +{ + testONNXModels("lstm"); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers From 8d69dbdf49f52c3610187753430de293dce823d0 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sun, 15 Mar 2020 23:21:58 +0300 Subject: [PATCH 2/5] LSTM from ONNX works --- modules/dnn/src/layers/recurrent_layers.cpp | 10 - .../dnn/src/onnx/onnx_graph_simplifier.cpp | 25 --- modules/dnn/src/onnx/onnx_importer.cpp | 186 +++++++----------- modules/dnn/src/tensorflow/tf_importer.cpp | 7 - modules/dnn/test/test_onnx_importer.cpp | 6 - 5 files changed, 66 insertions(+), 168 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index a3962db127..3f9a229516 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -215,8 +215,6 @@ public: internals.push_back(shape(_numSamples, 1)); // dummyOnes internals.push_back(shape(_numSamples, 4*_numOut)); // gates - - std::cout << "LSTM out: " << outputs[0] << '\n'; return false; } @@ -303,8 +301,6 @@ public: tsEnd = numTimeStamps; tsInc = 1; } - std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << '\n'; - std::cout << tsStart << " " << tsEnd << '\n'; for (int ts = tsStart; ts != tsEnd; ts += tsInc) { Range curRowRange(ts*numSamples, (ts + 1)*numSamples); @@ -318,7 +314,6 @@ public: Mat gateF = gates.colRange(1*numOut, 2*numOut); Mat gateO = gates.colRange(2*numOut, 3*numOut); Mat gateG = gates.colRange(3*numOut, 4*numOut); - std::cout << "i " << gateI << '\n'; if (forgetBias) add(gateF, forgetBias, gateF); @@ -334,7 +329,6 @@ public: { Mat gatesIFO = gates.colRange(0, 3*numOut); sigmoid(gatesIFO, gatesIFO); - std::cout << "ifo " << gatesIFO << '\n'; } tanh(gateG, gateG); @@ -351,15 +345,12 @@ public: } if (usePeephole) { - std::cout << "if (usePeephole)" << '\n'; gemm(cInternal, blobs[5], 1, gateO, 1, gateO); sigmoid(gateO, gateO); } //compute h_t tanh(cInternal, hInternal); - std::cout << "o " << gateO << '\n'; - std::cout << "tanh(o) " << hInternal << '\n'; multiply(gateO, hInternal, hInternal); //save results in output blobs @@ -367,7 +358,6 @@ public: if (produceCellOutput) cInternal.copyTo(cOutTs.rowRange(curRowRange)); } - std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << '\n'; } }; diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index 6693a75ff4..fe96927840 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -290,30 +290,6 @@ public: } }; -// // To remove Squeeze after LSTM for non-bidirectional LSTM -// class LSTMSqueeze : public Subgraph -// { -// public: -// LSTMSqueeze() -// { -// int input = addNodeToMatch(""); -// -// std::vector lstmInps(7); -// lstmInps[0] = input; -// -// for (int i = 1; i < 4; ++i) -// lstmInps[i] = addNodeToMatch("Unsqueeze"); -// lstmInps[4] = addNodeToMatch(""); -// for (int i = 5; i < 7; ++i) -// lstmInps[i] = addNodeToMatch("ConstantOfShape"); -// -// int lstm = addNodeToMatch("LSTM", lstmInps); -// addNodeToMatch("Squeeze", lstm); -// -// setFusedNode("LSTM", lstmInps); -// } -// }; - void simplifySubgraphs(opencv_onnx::GraphProto& net) { std::vector > subgraphs; @@ -323,7 +299,6 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); - // subgraphs.push_back(makePtr()); simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index bcf3d28eed..2bcba9e6ad 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -322,7 +322,7 @@ void ONNXImporter::populateNet(Net dstNet) std::string layer_type = node_proto.op_type(); layerParams.type = layer_type; - std::cout << layerParams.name << " " << layer_type << '\n'; + if (layer_type == "MaxPool") { @@ -457,19 +457,6 @@ void ONNXImporter::populateNet(Net dstNet) constBlobs.insert(std::make_pair(layerParams.name, sliced[0])); continue; } - - layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); - layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); - - CV_Assert(node_proto.input_size() == 1); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - std::vector inputs(1, getBlob(node_proto, constBlobs, 0)), sliced; - runLayer(layerParams, inputs, sliced); - CV_Assert(sliced.size() == 1); - constBlobs.insert(std::make_pair(layerParams.name, sliced[0])); - continue; - } } else if (layer_type == "Split") { @@ -592,116 +579,43 @@ void ONNXImporter::populateNet(Net dstNet) constBlobs.insert(std::make_pair(layerParams.name, layerParams.blobs[0])); continue; } - else if (layer_type == "ConstantFill" || layer_type == "ConstantOfShape") - { - CV_Assert_N(node_proto.input_size()); - MatShape inpShape = getBlob(node_proto, constBlobs, 0); - float value = layerParams.get("value", 0); - Mat fill(inpShape.size(), &inpShape[0], CV_32F, Scalar(value)); - constBlobs.insert(std::make_pair(layerParams.name, fill)); - continue; - } else if (layer_type == "LSTM") { - std::cout << "~~~~~~" << '\n'; - std::cout << layerParams << '\n'; - for (int i = 1; i < node_proto.input_size(); ++i) { - std::cout << "i: " << node_proto.input(i) << " " << constBlobs[node_proto.input(i)].size << '\n'; - } - + // https://pytorch.org/docs/stable/nn.html#lstm CV_Assert(node_proto.input_size() == 7); Mat Wx = getBlob(node_proto, constBlobs, 1); Mat Wh = getBlob(node_proto, constBlobs, 2); Mat b = getBlob(node_proto, constBlobs, 3); + const int numHidden = Wh.size[2]; - std::cout << Wx.size << '\n'; - std::cout << Wh.size << '\n'; - - int Wx_shape[] = {Wx.size[1], Wx.size[2]}; - int Wh_shape[] = {Wh.size[1], Wh.size[2]}; - std::cout << "b.size " << b.size << '\n'; - int b_shape[] = {2, b.size[1] / 2}; - - Wx = Wx.reshape(1, 2, &Wx_shape[0]); - b = b.reshape(1, 2, &b_shape[0]); - - std::cout << "b ----------------" << '\n'; - - std::cout << b << '\n'; + Wx = Wx.reshape(1, Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[1]); + b = b.reshape(1, 2); reduce(b, b, 0, REDUCE_SUM); - std::cout << b << '\n'; - - // https://pytorch.org/docs/stable/nn.html#lstm - // IFGO->IFOG - // swap each 3rd and 4th rows - // Wx = Wx.t(); - - float* weightData = (float*)Wx.data; - std::swap(weightData[1], weightData[2]); + // IFGO->IGFO + float* WxData = (float*)Wx.data; + float* WhData = (float*)Wh.data; float* biasData = (float*)b.data; - std::swap(biasData[1], biasData[2]); - - // std::swap(weightData[2], weightData[3]); - // - // weightData = (float*)Wh.data; - // std::swap(weightData[1], weightData[2]); - // std::swap(weightData[2], weightData[3]); - - - // const int outSize = Wx.cols / 4; - // for (int i = 0; i < Wx.rows; ++i) - // for (int j = 0; j < outSize; ++j) - // { - // // std::swap(weightData[i * W.cols + 1 * outSize + j], - // // weightData[i * W.cols + 2 * outSize + j]); - // std::swap(weightData[i * Wx.cols + 2 * outSize + j], - // weightData[i * Wx.cols + 3 * outSize + j]); - // } - - // float* weightData = Wx.ptr(); - // for (int j = 0; j < 5; ++j) - // { - // std::cout << "swap " << (10 + j) << " " << (15 + j) << '\n'; - // for (int i = 0; i < 12; ++i) - // std::swap(weightData[(10 + j) * 12 + i], - // weightData[(15 + j) * 12 + i]); - // } - + for (int j = 0; j < numHidden; ++j) + { + for (int i = 0; i < Wx.cols; ++i) + { + std::swap(WxData[(numHidden + j) * Wx.cols + i], + WxData[(numHidden * 2 + j) * Wx.cols + i]); + } + for (int i = 0; i < Wh.cols; ++i) + { + std::swap(WhData[(numHidden + j) * Wh.cols + i], + WhData[(numHidden * 2 + j) * Wh.cols + i]); + } + std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); + } layerParams.blobs.resize(3); - layerParams.blobs[0] = Wh.reshape(1, 2, &Wh_shape[0]); + layerParams.blobs[0] = Wh; layerParams.blobs[1] = Wx; layerParams.blobs[2] = b; - - std::cout << "Wx" << '\n'; - std::cout << layerParams.blobs[1] << '\n'; - - std::cout << "Wh" << '\n'; - std::cout << layerParams.blobs[0] << '\n'; - - // layerParams.set("reverse", true); - - - // layerParams.set("use_peephole", true); - // layerParams.blobs.resize(6); - // for (int i = 0; i < 3; ++i) - // { - // Mat w = Mat::eye(layerParams.blobs[0].cols, layerParams.blobs[0].cols, CV_32F); - // layerParams.blobs[3 + i] = w; - // } - - // std::cout << layerParams.blobs[1] << '\n'; - - // int lstmId = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); - // - // layerParams = LayerParams(); - // - // // Add reshape - // int shape[] = {1, 10, 11, 5}; - // layerParams.name = node_proto.output(0) + "/reshape"; - // layerParams.type = "Reshape"; - // layerParams.set("dim", DictValue::arrayInt(&shape[0], 4)); } else if (layer_type == "ImageScaler") { @@ -1005,14 +919,29 @@ void ONNXImporter::populateNet(Net dstNet) else if (layer_type == "Squeeze") { CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); - // DictValue axes_dict = layerParams.get("axes"); - // if (axes_dict.size() != 1) - // CV_Error(Error::StsNotImplemented, "Multidimensional squeeze"); - // - // int axis = axes_dict.getIntValue(0); - // layerParams.set("axis", axis - 1); - // layerParams.set("end_axis", axis); - layerParams.type = "Identity"; + DictValue axes_dict = layerParams.get("axes"); + MatShape inpShape = outShapes[node_proto.input(0)]; + + std::vector maskedAxes(inpShape.size(), false); + for (int i = 0; i < axes_dict.size(); ++i) + { + int axis = axes_dict.getIntValue(i); + CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); + maskedAxes[axis] = inpShape[axis] == 1; + } + MatShape outShape; + for (int i = 0; i < inpShape.size(); ++i) + { + if (!maskedAxes[i]) + outShape.push_back(inpShape[i]); + } + if (outShape.size() != inpShape.size()) + { + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + } + else + layerParams.type = "Identity"; } else if (layer_type == "Flatten") { @@ -1142,9 +1071,26 @@ void ONNXImporter::populateNet(Net dstNet) else layerParams.type = "Identity"; } - else if (layer_type == "ConstantOfShape") + else if (layer_type == "ConstantFill" || layer_type == "ConstantOfShape") { - float fill_value = layerParams.blobs.empty() ? 0 : layerParams.blobs[0].at(0, 0); + CV_Assert_N(node_proto.input_size()); + MatShape inpShape = getBlob(node_proto, constBlobs, 0); + float value = layerParams.get("value", 0); + Mat fill(inpShape.size(), &inpShape[0], CV_32F, Scalar(value)); + constBlobs.insert(std::make_pair(layerParams.name, fill)); + continue; + } + else if (layer_type == "ConstantOfShape" || layer_type == "ConstantFill") + { + float fill_value; + if (!layerParams.blobs.empty()) + { + CV_Assert(!layerParams.has("value")); + fill_value = layerParams.blobs[0].at(0, 0); + } + else + fill_value = layerParams.get("value", 0); + MatShape inpShape = getBlob(node_proto, constBlobs, 0); for (int i = 0; i < inpShape.size(); i++) CV_CheckGT(inpShape[i], 0, ""); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 60ba6d39c5..fe7e47f7a0 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1826,12 +1826,10 @@ void TFImporter::populateNet(Net dstNet) const int outSize = W.cols / 4; // IGFO->IFOG - std::cout << "(TF) W " << W.size << '\n'; float* weightData = (float*)W.data; for (int i = 0; i < W.rows; ++i) for (int j = 0; j < outSize; ++j) { - // std::cout << "swap " << i * W.cols + 1 * outSize << " " << i * W.cols + 2 * outSize << '\n'; std::swap(weightData[i * W.cols + 1 * outSize + j], weightData[i * W.cols + 2 * outSize + j]); std::swap(weightData[i * W.cols + 2 * outSize + j], @@ -1840,11 +1838,6 @@ void TFImporter::populateNet(Net dstNet) Wx = W.rowRange(0, W.rows - outSize).t(); Wh = W.rowRange(W.rows - outSize, W.rows).t(); - std::cout << "(TF) Wx " << Wx.size << '\n'; - std::cout << "(TF) Wh " << Wh.size << '\n'; - std::cout << "(TF) b " << b.size << '\n'; - - layerParams.blobs.resize(3); layerParams.blobs[0] = Wh; layerParams.blobs[1] = Wx; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index c5b243b8ab..a2cd2c3a68 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -79,12 +79,6 @@ public: netSoftmax.setInput(ref); ref = netSoftmax.forward(); } - std::cout << "ref: " << ref.size << '\n'; - std::cout << "out: " << out.size << '\n'; - std::cout << ref.reshape(1, 1) << '\n'; - std::cout << '\n'; - std::cout << out.reshape(1, 1) << '\n'; - normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf); if (checkNoFallbacks) expectNoFallbacksFromIE(net); From 11d565ca629d5b36993752941472a26244600e79 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 18 Mar 2020 00:00:24 +0300 Subject: [PATCH 3/5] Fix LSTM from ONNX with batch==1 --- modules/dnn/src/layers/recurrent_layers.cpp | 9 +- modules/dnn/src/onnx/onnx_importer.cpp | 97 ++++++++++++++------- 2 files changed, 69 insertions(+), 37 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 3f9a229516..26d2ea9de5 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -110,10 +110,11 @@ public: const Mat& Wh = blobs[0]; const Mat& Wx = blobs[1]; const Mat& bias = blobs[2]; - CV_Assert(Wh.dims == 2 && Wx.dims == 2); - CV_Assert(Wh.rows == Wx.rows); - CV_Assert(Wh.rows == 4*Wh.cols); - CV_Assert(Wh.rows == (int)bias.total()); + CV_CheckEQ(Wh.dims, 2, ""); + CV_CheckEQ(Wx.dims, 2, ""); + CV_CheckEQ(Wh.rows, Wx.rows, ""); + CV_CheckEQ(Wh.rows, 4*Wh.cols, ""); + CV_CheckEQ(Wh.rows, (int)bias.total(), ""); CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type()); // Peephole weights. diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 2bcba9e6ad..b243a986e7 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -49,6 +49,11 @@ class ONNXImporter LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto); bool isCeilMode(const LayerParams& layerParams); + void addLayer(Net& dstNet, LayerParams& layerParams, + const opencv_onnx::NodeProto& node_proto, + std::map& layer_id, + std::map& outShapes); + public: ONNXImporter(const char *onnxFile) @@ -259,6 +264,42 @@ Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, return constBlob->second; } +void ONNXImporter::addLayer(Net& dstNet, LayerParams& layerParams, + const opencv_onnx::NodeProto& node_proto, + std::map& layer_id, + std::map& outShapes) +{ + std::map::iterator layerId; + std::map::iterator shapeIt; + + int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + for (int i = 0; i < node_proto.output_size(); ++i) + { + layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i))); + } + + std::vector layerInpShapes, layerOutShapes, layerInternalShapes; + int inpNum = 0; + for (int j = 0; j < node_proto.input_size(); j++) { + layerId = layer_id.find(node_proto.input(j)); + if (layerId != layer_id.end()) { + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); + ++inpNum; + // Collect input shapes. + shapeIt = outShapes.find(node_proto.input(j)); + CV_Assert(shapeIt != outShapes.end()); + layerInpShapes.push_back(shapeIt->second); + } + } + // Compute shape of output blob for this layer. + Ptr layer = dstNet.getLayer(id); + layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); + for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i) + { + outShapes[node_proto.output(i)] = layerOutShapes[i]; + } +} + void ONNXImporter::populateNet(Net dstNet) { CV_Assert(model_proto.has_graph()); @@ -581,13 +622,16 @@ void ONNXImporter::populateNet(Net dstNet) } else if (layer_type == "LSTM") { + LayerParams lstmParams = layerParams; + lstmParams.name += "/lstm"; + // https://pytorch.org/docs/stable/nn.html#lstm CV_Assert(node_proto.input_size() == 7); Mat Wx = getBlob(node_proto, constBlobs, 1); Mat Wh = getBlob(node_proto, constBlobs, 2); Mat b = getBlob(node_proto, constBlobs, 3); - const int numHidden = Wh.size[2]; + const int numHidden = lstmParams.get("hidden_size"); Wx = Wx.reshape(1, Wx.size[1]); Wh = Wh.reshape(1, Wh.size[1]); @@ -612,10 +656,24 @@ void ONNXImporter::populateNet(Net dstNet) } std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); } - layerParams.blobs.resize(3); - layerParams.blobs[0] = Wh; - layerParams.blobs[1] = Wx; - layerParams.blobs[2] = b; + + lstmParams.blobs.resize(3); + lstmParams.blobs[0] = Wh; + lstmParams.blobs[1] = Wx; + lstmParams.blobs[2] = b; + + node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name + addLayer(dstNet, lstmParams, node_proto, layer_id, outShapes); + + MatShape lstmShape = outShapes[node_proto.output(0)]; + + // Add fake 1 as it is done in ONNX + lstmShape.insert(lstmShape.begin() + 1, 1); + + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); + node_proto.set_input(0, lstmParams.name); // redirect input to LSTM + node_proto.set_output(0, layerParams.name); // keep origin LSTM's name } else if (layer_type == "ImageScaler") { @@ -1228,34 +1286,7 @@ void ONNXImporter::populateNet(Net dstNet) layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } } - - int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); - for (int i = 0; i < node_proto.output_size(); ++i) - { - layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i))); - } - - std::vector layerInpShapes, layerOutShapes, layerInternalShapes; - int inpNum = 0; - for (int j = 0; j < node_proto.input_size(); j++) { - layerId = layer_id.find(node_proto.input(j)); - if (layerId != layer_id.end()) { - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); - ++inpNum; - // Collect input shapes. - shapeIt = outShapes.find(node_proto.input(j)); - CV_Assert(shapeIt != outShapes.end()); - layerInpShapes.push_back(shapeIt->second); - } - } - - // Compute shape of output blob for this layer. - Ptr layer = dstNet.getLayer(id); - layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); - for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i) - { - outShapes[node_proto.output(i)] = layerOutShapes[i]; - } + addLayer(dstNet, layerParams, node_proto, layer_id, outShapes); } } From 8433620295891c184ce4edd86bbd5ad6440eda45 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sun, 22 Mar 2020 00:20:36 +0300 Subject: [PATCH 4/5] Bidirectional LSTM --- modules/dnn/src/layers/recurrent_layers.cpp | 162 +++++++++++--------- modules/dnn/src/onnx/onnx_importer.cpp | 43 +++--- modules/dnn/test/test_onnx_importer.cpp | 5 + 3 files changed, 116 insertions(+), 94 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 26d2ea9de5..69606a6b4e 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -93,6 +93,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer float forgetBias, cellClip; bool useCellClip, usePeephole; bool reverse; // If true, go in negative direction along the time axis + bool bidirectional; // If true, produces both forward and reversed directions along time axis public: @@ -101,6 +102,7 @@ public: { setParamsFrom(params); + bidirectional = params.get("bidirectional", false); if (!blobs.empty()) { CV_Assert(blobs.size() >= 3); @@ -113,7 +115,7 @@ public: CV_CheckEQ(Wh.dims, 2, ""); CV_CheckEQ(Wx.dims, 2, ""); CV_CheckEQ(Wh.rows, Wx.rows, ""); - CV_CheckEQ(Wh.rows, 4*Wh.cols, ""); + CV_CheckEQ(Wh.rows, (1 + static_cast(bidirectional))*4*Wh.cols, ""); CV_CheckEQ(Wh.rows, (int)bias.total(), ""); CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type()); @@ -136,6 +138,7 @@ public: useCellClip = params.get("use_cell_clip", false); usePeephole = params.get("use_peephole", false); reverse = params.get("reverse", false); + CV_Assert(!reverse || !bidirectional); allocated = false; outTailShape.clear(); @@ -207,6 +210,7 @@ public: outResShape.push_back(_numSamples); outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end()); + outResShape.back() *= (1 + static_cast(bidirectional)); size_t noutputs = produceCellOutput ? 2 : 1; outputs.assign(noutputs, outResShape); @@ -253,6 +257,7 @@ public: outTsShape.clear(); outTsShape.push_back(numSamples); outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end()); + outTsShape.back() *= (1 + static_cast(bidirectional)); allocated = true; } @@ -273,91 +278,96 @@ public: outputs_arr.getMatVector(output); internals_arr.getMatVector(internals); - const Mat &Wh = blobs[0]; - const Mat &Wx = blobs[1]; - const Mat &bias = blobs[2]; - - int numOut = Wh.size[1]; - - Mat hInternal = internals[0], cInternal = internals[1], - dummyOnes = internals[2], gates = internals[3]; - hInternal.setTo(0.); - cInternal.setTo(0.); - dummyOnes.setTo(1.); - - int numSamplesTotal = numTimeStamps*numSamples; - Mat xTs = input[0].reshape(1, numSamplesTotal); - - Mat hOutTs = output[0].reshape(1, numSamplesTotal); - Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat(); - - int tsStart, tsEnd, tsInc; - if (reverse) { - tsStart = numTimeStamps - 1; - tsEnd = -1; - tsInc = -1; - } - else { - tsStart = 0; - tsEnd = numTimeStamps; - tsInc = 1; - } - for (int ts = tsStart; ts != tsEnd; ts += tsInc) + const int numDirs = 1 + static_cast(bidirectional); + for (int i = 0; i < numDirs; ++i) { - Range curRowRange(ts*numSamples, (ts + 1)*numSamples); - Mat xCurr = xTs.rowRange(curRowRange); + const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); + const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); + const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); - gemm(xCurr, Wx, 1, gates, 0, gates, GEMM_2_T); // Wx * x_t - gemm(hInternal, Wh, 1, gates, 1, gates, GEMM_2_T); //+Wh * h_{t-1} - gemm(dummyOnes, bias, 1, gates, 1, gates); //+b + int numOut = Wh.size[1]; - Mat gateI = gates.colRange(0*numOut, 1*numOut); - Mat gateF = gates.colRange(1*numOut, 2*numOut); - Mat gateO = gates.colRange(2*numOut, 3*numOut); - Mat gateG = gates.colRange(3*numOut, 4*numOut); + Mat hInternal = internals[0], cInternal = internals[1], + dummyOnes = internals[2], gates = internals[3]; + hInternal.setTo(0.); + cInternal.setTo(0.); + dummyOnes.setTo(1.); - if (forgetBias) - add(gateF, forgetBias, gateF); + int numSamplesTotal = numTimeStamps*numSamples; + Mat xTs = input[0].reshape(1, numSamplesTotal); - if (usePeephole) - { - Mat gatesIF = gates.colRange(0, 2*numOut); - gemm(cInternal, blobs[3], 1, gateI, 1, gateI); - gemm(cInternal, blobs[4], 1, gateF, 1, gateF); - sigmoid(gatesIF, gatesIF); + Mat hOutTs = output[0].reshape(1, numSamplesTotal); + hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs); + Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat(); + + int tsStart, tsEnd, tsInc; + if (reverse || i == 1) { + tsStart = numTimeStamps - 1; + tsEnd = -1; + tsInc = -1; } - else - { - Mat gatesIFO = gates.colRange(0, 3*numOut); - sigmoid(gatesIFO, gatesIFO); + else { + tsStart = 0; + tsEnd = numTimeStamps; + tsInc = 1; } - - tanh(gateG, gateG); - - //compute c_t - multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1} - multiply(gateI, gateG, gateI); // i_t (*) g_t - add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t - - if (useCellClip) + for (int ts = tsStart; ts != tsEnd; ts += tsInc) { - min(cInternal, cellClip, cInternal); - max(cInternal, -cellClip, cInternal); - } - if (usePeephole) - { - gemm(cInternal, blobs[5], 1, gateO, 1, gateO); - sigmoid(gateO, gateO); - } + Range curRowRange(ts*numSamples, (ts + 1)*numSamples); + Mat xCurr = xTs.rowRange(curRowRange); - //compute h_t - tanh(cInternal, hInternal); - multiply(gateO, hInternal, hInternal); + gemm(xCurr, Wx, 1, gates, 0, gates, GEMM_2_T); // Wx * x_t + gemm(hInternal, Wh, 1, gates, 1, gates, GEMM_2_T); //+Wh * h_{t-1} + gemm(dummyOnes, bias, 1, gates, 1, gates); //+b - //save results in output blobs - hInternal.copyTo(hOutTs.rowRange(curRowRange)); - if (produceCellOutput) - cInternal.copyTo(cOutTs.rowRange(curRowRange)); + Mat gateI = gates.colRange(0*numOut, 1*numOut); + Mat gateF = gates.colRange(1*numOut, 2*numOut); + Mat gateO = gates.colRange(2*numOut, 3*numOut); + Mat gateG = gates.colRange(3*numOut, 4*numOut); + + if (forgetBias) + add(gateF, forgetBias, gateF); + + if (usePeephole) + { + Mat gatesIF = gates.colRange(0, 2*numOut); + gemm(cInternal, blobs[3], 1, gateI, 1, gateI); + gemm(cInternal, blobs[4], 1, gateF, 1, gateF); + sigmoid(gatesIF, gatesIF); + } + else + { + Mat gatesIFO = gates.colRange(0, 3*numOut); + sigmoid(gatesIFO, gatesIFO); + } + + tanh(gateG, gateG); + + //compute c_t + multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1} + multiply(gateI, gateG, gateI); // i_t (*) g_t + add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t + + if (useCellClip) + { + min(cInternal, cellClip, cInternal); + max(cInternal, -cellClip, cInternal); + } + if (usePeephole) + { + gemm(cInternal, blobs[5], 1, gateO, 1, gateO); + sigmoid(gateO, gateO); + } + + //compute h_t + tanh(cInternal, hInternal); + multiply(gateO, hInternal, hInternal); + + //save results in output blobs + hInternal.copyTo(hOutTs.rowRange(curRowRange)); + if (produceCellOutput) + cInternal.copyTo(cOutTs.rowRange(curRowRange)); + } } } }; diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index b243a986e7..79386e6615 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -630,37 +630,44 @@ void ONNXImporter::populateNet(Net dstNet) Mat Wx = getBlob(node_proto, constBlobs, 1); Mat Wh = getBlob(node_proto, constBlobs, 2); Mat b = getBlob(node_proto, constBlobs, 3); + b = b.reshape(1, b.size[0]); const int numHidden = lstmParams.get("hidden_size"); - - Wx = Wx.reshape(1, Wx.size[1]); - Wh = Wh.reshape(1, Wh.size[1]); - b = b.reshape(1, 2); - reduce(b, b, 0, REDUCE_SUM); + const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. + const int numFeatures = Wx.size[2]; + Mat bx = b.colRange(0, b.cols / 2); + Mat bh = b.colRange(b.cols / 2, b.cols); + b = bx + bh; // IFGO->IGFO - float* WxData = (float*)Wx.data; - float* WhData = (float*)Wh.data; - float* biasData = (float*)b.data; - for (int j = 0; j < numHidden; ++j) + for (int k = 0; k < numDirs; ++k) { - for (int i = 0; i < Wx.cols; ++i) + float* WxData = Wx.ptr(k); + float* WhData = Wh.ptr(k); + float* biasData = b.ptr(k); + for (int j = 0; j < numHidden; ++j) { - std::swap(WxData[(numHidden + j) * Wx.cols + i], - WxData[(numHidden * 2 + j) * Wx.cols + i]); + for (int i = 0; i < numFeatures; ++i) + { + std::swap(WxData[(numHidden + j) * numFeatures + i], + WxData[(numHidden * 2 + j) * numFeatures + i]); + } + for (int i = 0; i < numHidden; ++i) + { + std::swap(WhData[(numHidden + j) * numHidden + i], + WhData[(numHidden * 2 + j) * numHidden + i]); + } + std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); } - for (int i = 0; i < Wh.cols; ++i) - { - std::swap(WhData[(numHidden + j) * Wh.cols + i], - WhData[(numHidden * 2 + j) * Wh.cols + i]); - } - std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); } + Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); lstmParams.blobs.resize(3); lstmParams.blobs[0] = Wh; lstmParams.blobs[1] = Wx; lstmParams.blobs[2] = b; + lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name addLayer(dstNet, lstmParams, node_proto, layer_id, outShapes); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index a2cd2c3a68..f741319959 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -456,6 +456,11 @@ TEST_P(Test_ONNX_layers, LSTM) testONNXModels("lstm"); } +TEST_P(Test_ONNX_layers, LSTM_bidirectional) +{ + testONNXModels("lstm_bidirectional"); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers From 467c3ef0ac621b2cbc296bbabe286bc9cc476696 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sun, 22 Mar 2020 16:04:30 +0300 Subject: [PATCH 5/5] Add checks for LSTM initial h and c --- modules/dnn/src/onnx/onnx_importer.cpp | 22 +++++++++++++--------- modules/dnn/test/test_onnx_importer.cpp | 6 ++++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 79386e6615..47b5aff674 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -496,6 +496,7 @@ void ONNXImporter::populateNet(Net dstNet) runLayer(layerParams, inputs, sliced); CV_Assert(sliced.size() == 1); constBlobs.insert(std::make_pair(layerParams.name, sliced[0])); + outShapes[layerParams.name] = shape(sliced[0]); continue; } } @@ -630,6 +631,8 @@ void ONNXImporter::populateNet(Net dstNet) Mat Wx = getBlob(node_proto, constBlobs, 1); Mat Wh = getBlob(node_proto, constBlobs, 2); Mat b = getBlob(node_proto, constBlobs, 3); + CV_CheckEQ(countNonZero(getBlob(node_proto, constBlobs, 5)), 0, "Unsupported non zero initial_h"); + CV_CheckEQ(countNonZero(getBlob(node_proto, constBlobs, 6)), 0, "Unsupported non zero initial_c"); b = b.reshape(1, b.size[0]); const int numHidden = lstmParams.get("hidden_size"); @@ -1007,6 +1010,16 @@ void ONNXImporter::populateNet(Net dstNet) } else layerParams.type = "Identity"; + + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inp = getBlob(node_proto, constBlobs, 0); + Mat out = inp.reshape(1, outShape); + out.dims = outShape.size(); // to workaround dims == 1 + constBlobs.insert(std::make_pair(layerParams.name, out)); + outShapes[layerParams.name] = shape(out); + continue; + } } else if (layer_type == "Flatten") { @@ -1136,15 +1149,6 @@ void ONNXImporter::populateNet(Net dstNet) else layerParams.type = "Identity"; } - else if (layer_type == "ConstantFill" || layer_type == "ConstantOfShape") - { - CV_Assert_N(node_proto.input_size()); - MatShape inpShape = getBlob(node_proto, constBlobs, 0); - float value = layerParams.get("value", 0); - Mat fill(inpShape.size(), &inpShape[0], CV_32F, Scalar(value)); - constBlobs.insert(std::make_pair(layerParams.name, fill)); - continue; - } else if (layer_type == "ConstantOfShape" || layer_type == "ConstantFill") { float fill_value; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index f741319959..6932e83a4e 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -405,6 +405,8 @@ TEST_P(Test_ONNX_layers, Reshape) TEST_P(Test_ONNX_layers, Squeeze) { + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); testONNXModels("squeeze"); } @@ -453,12 +455,12 @@ TEST_P(Test_ONNX_layers, Split_EltwiseMax) TEST_P(Test_ONNX_layers, LSTM) { - testONNXModels("lstm"); + testONNXModels("lstm", npy, 0, 0, false, false); } TEST_P(Test_ONNX_layers, LSTM_bidirectional) { - testONNXModels("lstm_bidirectional"); + testONNXModels("lstm_bidirectional", npy, 0, 0, false, false); } INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());