From 76350cd30f6c4697b5be7968ba0b8ab9cc28b64c Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 13 Feb 2023 17:00:20 +0300 Subject: [PATCH] Merge pull request #23161 from dkurt:dnn_tflite TFLite models importer * initial commit * Refactor TFLiteImporter * Better FlatBuffers detection * Add permute before 4D->3D reshape * Track layers layout * TFLite Convolution2DTransposeBias layer * Skip TFLite tests without FlatBuffers * Fix check of FlatBuffers in tests. Add readNetFromTFLite from buffer * TFLite Max Unpooling test * Add skip for TFLite unpooling test * Revert DW convolution workaround * Fix ObjC bindings * Better errors handling * Regenerate TFLite schema using flatc * dnn(tflite): more checks, better logging * Checks for unimplemented fusion. Fix tests --- CMakeLists.txt | 4 + cmake/OpenCVFindFlatBuffers.cmake | 15 + modules/dnn/CMakeLists.txt | 17 + modules/dnn/include/opencv2/dnn/dnn.hpp | 20 + modules/dnn/misc/objc/gen_dict.json | 4 +- modules/dnn/src/dnn_read.cpp | 6 + modules/dnn/src/tflite/builtin_op_data.h | 41 + modules/dnn/src/tflite/schema.fbs | 1341 ++++++++++++++++++++ modules/dnn/src/tflite/tflite_importer.cpp | 644 ++++++++++ modules/dnn/test/test_tflite_importer.cpp | 123 ++ platforms/js/opencv_js.config.py | 2 +- 11 files changed, 2215 insertions(+), 2 deletions(-) create mode 100644 cmake/OpenCVFindFlatBuffers.cmake create mode 100644 modules/dnn/src/tflite/builtin_op_data.h create mode 100644 modules/dnn/src/tflite/schema.fbs create mode 100644 modules/dnn/src/tflite/tflite_importer.cpp create mode 100644 modules/dnn/test/test_tflite_importer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cbf43a1605..5543cba93a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,6 +471,9 @@ OCV_OPTION(WITH_OBSENSOR "Include obsensor support (Orbbec RGB-D modules: Astra+ OCV_OPTION(WITH_CANN "Include CANN support" OFF VISIBLE_IF TRUE VERIFY HAVE_CANN) +OCV_OPTION(WITH_FLATBUFFERS "Include FlatBuffers support" OFF + VISIBLE_IF TRUE + VERIFY HAVE_FLATBUFFERS) # OpenCV build components # =================================================== @@ -750,6 +753,7 @@ include(cmake/OpenCVFindLibsVideo.cmake) include(cmake/OpenCVFindLibsPerf.cmake) include(cmake/OpenCVFindLAPACK.cmake) include(cmake/OpenCVFindProtobuf.cmake) +include(cmake/OpenCVFindFlatBuffers.cmake) if(WITH_TENGINE) include(cmake/OpenCVFindTengine.cmake) endif() diff --git a/cmake/OpenCVFindFlatBuffers.cmake b/cmake/OpenCVFindFlatBuffers.cmake new file mode 100644 index 0000000000..2b204314eb --- /dev/null +++ b/cmake/OpenCVFindFlatBuffers.cmake @@ -0,0 +1,15 @@ +set(HAVE_FLATBUFFERS FALSE) + +if(NOT WITH_FLATBUFFERS) + return() +endif() + +list(APPEND CUSTOM_STATUS flatbuffers) + +find_package(flatbuffers QUIET) +if(flatbuffers_FOUND) + set(HAVE_FLATBUFFERS 1) + list(APPEND CUSTOM_STATUS_flatbuffers " FlatBuffers:" "${flatbuffers_VERSION}") +else() + list(APPEND CUSTOM_STATUS_flatbuffers " FlatBuffers:" "NO") +endif() diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 1ec21c085d..e5aca128be 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -133,6 +133,17 @@ if(NOT BUILD_PROTOBUF) list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS}) endif() +if(HAVE_FLATBUFFERS) + list(APPEND libs flatbuffers::flatbuffers) + list(APPEND fw_srcs "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h") + + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h" + COMMAND flatbuffers::flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_LIST_DIR}/src/tflite/schema.fbs") + + ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_FLATBUFFERS=1") +endif() + set(sources_options "") list(APPEND libs ${LAPACK_LIBRARIES}) @@ -280,3 +291,9 @@ if(TARGET ocv.3rdparty.cann AND OPENCV_TEST_DNN_CANN) ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.cann) endif() endif() + +if(HAVE_FLATBUFFERS) + if(TARGET opencv_test_dnn) + ocv_target_compile_definitions(opencv_test_dnn PRIVATE "HAVE_FLATBUFFERS=1") + endif() +endif() diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index ffc9473c6e..11ad69b8d9 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -953,6 +953,26 @@ CV__DNN_INLINE_NS_BEGIN CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel, const char *bufferConfig = NULL, size_t lenConfig = 0); + /** @brief Reads a network model stored in TFLite framework's format. + * @param model path to the .tflite file with binary flatbuffers description of the network architecture + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromTFLite(const String &model); + + /** @brief Reads a network model stored in TFLite framework's format. + * @param bufferModel buffer containing the content of the tflite file + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromTFLite(const std::vector& bufferModel); + + /** @brief Reads a network model stored in TFLite framework's format. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + * @param bufferModel buffer containing the content of the tflite file + * @param lenModel length of bufferModel + */ + CV_EXPORTS Net readNetFromTFLite(const char *bufferModel, size_t lenModel); + /** * @brief Reads a network model stored in Torch7 framework's format. * @param model path to the file, dumped from Torch by using torch.save() function. diff --git a/modules/dnn/misc/objc/gen_dict.json b/modules/dnn/misc/objc/gen_dict.json index 6072bdfc01..8aab0a5500 100644 --- a/modules/dnn/misc/objc/gen_dict.json +++ b/modules/dnn/misc/objc/gen_dict.json @@ -8,7 +8,9 @@ "(Net*)readNetFromONNX:(NSString*)onnxFile" : { "readNetFromONNX" : {"name" : "readNetFromONNXFile"} }, "(Net*)readNetFromONNX:(ByteVector*)buffer" : { "readNetFromONNX" : {"name" : "readNetFromONNXBuffer"} }, "(Net*)readNetFromTensorflow:(NSString*)model config:(NSString*)config" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowFile"} }, - "(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} } + "(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} }, + "(Net*)readNetFromTFLite:(NSString*)model" : { "readNetFromTFLite" : {"name" : "readNetFromTFLiteFile"} }, + "(Net*)readNetFromTFLite:(ByteVector*)buffer" : { "readNetFromTFLite" : {"name" : "readNetFromTFLiteBuffer"} } }, "Net": { "(void)forward:(NSMutableArray*)outputBlobs outputName:(NSString*)outputName" : { "forward" : {"name" : "forwardOutputBlobs"} }, diff --git a/modules/dnn/src/dnn_read.cpp b/modules/dnn/src/dnn_read.cpp index 931170722b..9c06ced3c4 100644 --- a/modules/dnn/src/dnn_read.cpp +++ b/modules/dnn/src/dnn_read.cpp @@ -29,6 +29,10 @@ Net readNet(const String& _model, const String& _config, const String& _framewor std::swap(model, config); return readNetFromTensorflow(model, config); } + if (framework == "tflite" || modelExt == "tflite") + { + return readNetFromTFLite(model); + } if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net") { return readNetFromTorch(model.empty() ? config : model); @@ -66,6 +70,8 @@ Net readNet(const String& _framework, const std::vector& bufferModel, CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); else if (framework == "dldt") return readNetFromModelOptimizer(bufferConfig, bufferModel); + else if (framework == "tflite") + return readNetFromTFLite(bufferModel); CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); } diff --git a/modules/dnn/src/tflite/builtin_op_data.h b/modules/dnn/src/tflite/builtin_op_data.h new file mode 100644 index 0000000000..114d4f0cf8 --- /dev/null +++ b/modules/dnn/src/tflite/builtin_op_data.h @@ -0,0 +1,41 @@ +// source: https://github.com/tensorflow/tensorflow/blob/b2f5959ff823a8ed5bf4883e785f8f96d4253a8b/tensorflow/lite/core/c/builtin_op_data.h +typedef enum { + kTfLitePaddingUnknown = 0, + kTfLitePaddingSame, + kTfLitePaddingValid, +} TfLitePadding; + +typedef enum { + kTfLiteActNone = 0, + kTfLiteActRelu, + kTfLiteActReluN1To1, // min(max(-1, x), 1) + kTfLiteActRelu6, // min(max(0, x), 6) + kTfLiteActTanh, + kTfLiteActSignBit, + kTfLiteActSigmoid, +} TfLiteFusedActivation; + +typedef struct { + int width; + int height; + int width_offset; + int height_offset; +} TfLitePaddingValues; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + int filter_width; + int filter_height; + TfLiteFusedActivation activation; + struct { + TfLitePaddingValues padding; + } computed; +} TfLitePoolParams; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; +} TfLiteTransposeConvParams; diff --git a/modules/dnn/src/tflite/schema.fbs b/modules/dnn/src/tflite/schema.fbs new file mode 100644 index 0000000000..7eb63f60ef --- /dev/null +++ b/modules/dnn/src/tflite/schema.fbs @@ -0,0 +1,1341 @@ +// source: https://github.com/tensorflow/tensorflow/blob/b0164f014fd4f1b5af2c7b578aa7687198c5d92e/tensorflow/lite/schema/schema.fbs +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. +// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers. +// Version 3a: Add new builtin op code field. Has backward compatibility with +// version 3. +// Version 3b: Rename fields in SignatureDef. Has backward compatibility with +// version 3 and 3a. + +namespace opencv_tflite; + +// This corresponds to the version. +file_identifier "TFL3"; +// File extension of any written files. +file_extension "tflite"; + +// IMPORTANT: All new members of tables, enums and unions must be added at the +// end to ensure backwards compatibility. + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, + BOOL = 6, + INT16 = 7, + COMPLEX64 = 8, + INT8 = 9, + FLOAT64 = 10, + COMPLEX128 = 11, + UINT64 = 12, + // Experimental: Resource and variant types are experimental, that are subject + // to change. Do not implement custom kernels using resource & variant types + // now. + RESOURCE = 13, + VARIANT = 14, + UINT32 = 15, + UINT16 = 16, + INT4 = 17, +} + +// Custom quantization parameters for experimenting with new quantization +// techniques. +table CustomQuantization { + custom:[ubyte] (force_align: 16); +} + +// Represents a specific quantization technique's parameters. +union QuantizationDetails { + CustomQuantization, +} + +// Parameters for converting a quantized tensor back to float. +table QuantizationParameters { + // These four parameters are the asymmetric linear quantization parameters. + // Given a quantized value q, the corresponding float value f should be: + // f = scale * (q - zero_point) + // For other quantization types, the QuantizationDetails below is used. + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; // For dequantizing the tensor's values. + zero_point:[long]; + + // If this is not none, the other quantization parameters (i.e. min, max, + // scale, zero_point fields above) are ignored and the value of the + // QuantizationDetails union should be used. + details:QuantizationDetails; + + // Specifies the dimension of the Tensor's shape that the scales and + // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1] + // with quantization params: + // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1 + // will be quantized across the second dimension of t. + // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1 + // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2 + // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3 + quantized_dimension:int; +} + +// Sparse tensors. +// We use a modification of the TACO format. +// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf +// +// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1), +// potentially with a k-dimensional block (0 <= k <= n) with dims +// (dn, ..., dn+k-1), the format needs to specify: +// 1. In what order to traverse these dimensions. For example, to store a 2-D +// matrix in row major order, the traversal order would be (d0, d1), +// whereas to store it in column major order, the traversal order would be +// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order +// could be (d0, d1, d2, d3). +// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original +// tensor dimension in (d0, ..., dn-1). +// 3. In the traversal order defined above, the format (dense vs. sparse) and +// index metadata for each dimension. For a dense dimension, this is just +// the size of that dimension. For a sparse dimension, it's the same as +// the compressed index defined in the Compressed Sparse Row (CSR) format. +// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html) + +// The storage type for a dimension. Currently we support: +// 1. DENSE: each coordinate in this dimension is stored implicitly. +// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The +// compression technique is the same what CSR uses. +// More types like a sparse dimension with a different compression technique +// could be added to the list in the future. +enum DimensionType : byte { + DENSE = 0, + SPARSE_CSR = 1, +} + +table Int32Vector { + values:[int]; +} + +table Uint16Vector { + values:[ushort] (force_align: 4); +} + +table Uint8Vector { + values:[ubyte] (force_align: 4); +} + +// Variable-typed buffer to store the index metadata for a sparse dimension. +// The widest type is Int32 instead of UInt32 because tensor's shape is a int32 +// vector. We don't want the per-dimensional index to overflow that range. +union SparseIndexVector { + Int32Vector, + Uint16Vector, + Uint8Vector +} + +table DimensionMetadata { + // Whether a dimension is dense or sparse. + format:DimensionType; + // Index metadata used for a dimension. + // - If format is DimensionType.DENSE then we use the dense_size field to + // store the size of that dimension. Each index in that dimension is + // stored implicitly. + // - If format is DimensionType.SPARSE_CSR then we use array_segments and + // array_indices to encode that dimension. array_segments represents how + // to segment the indices array, each segment corresponds to one element + // in the previous dimension. array_indices represents the index of the + // non-zero elements within this dimension (as those in the CSR matrix + // format, where the first array is row pointers and the second array is + // column indices). + dense_size:int; + array_segments:SparseIndexVector; + array_indices:SparseIndexVector; +} + +// Parameters to encode a sparse TfLite tensor. +table SparsityParameters { + // The traversal order of the dimensions defined in the `shape` field of the + // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1, + // ..., dn-1), + // - if not block sparse, the traversal_order is just a permutation of (d0, + // ..., dn-1). For example, a 2-D matrix stored in row-major order would + // have traversal_order = (d0, d1). + // - if block sparse with a k-dimensional block (0 <= k <= n), the + // traversal_order has n + k elements. The first n elements are still a + // permutation of (d0, ..., dn-1). The lask k elements are a permutation + // of (dn, ..., dn+k-1), defining how to traverse a block internally. For + // example, a 2-D matrix with 2-D blocks, both stored in row-major order + // would have traversal_order = (d0, d1, d2, d3). + traversal_order:[int]; + // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n), + // stores how a block dimension in (dn, ..., dn+k-1) maps to the original + // tensor dimension in (d0, ..., dn). + // It's stored in the order of (dn, ..., dn+k-1). + // If not block-sparse, this field is NULL. + block_map:[int]; + // In the traversal order defined above, the metadata needed for + // each dimension to locate the non-zero values in the original dense tensor. + // The size of the dim_metadata array = the size of the traversal_order array + // = n + k. + dim_metadata:[DimensionMetadata]; +} + +// The nested tensor type for VARIANT type. +table VariantSubType { + // The tensor shape. + shape:[int]; + type:TensorType; + // If false, the rank or the number of tensor dimensions is unknown. + // If false, "shape" must be []. + has_rank: bool = false; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, height, width, number of channels] (That's + // Tensorflow's NHWC). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existent empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. + + is_variable:bool = false; + + // Parameters to encode a sparse tensor. See the example in + // tensorflow/lite/testdata/sparse_tensor.json. + sparsity:SparsityParameters; // Optional. + + // Encodes `shape` with unknown dimensions. Unknown dimensions are + // represented with -1. + shape_signature:[int]; // Optional. + + // If false, the rank or the number of tensor dimensions is unknown. + // If false, "shape" must be []. + has_rank: bool = false; + + // The nested Tensor types for VARIANT type. This is always empty for + // non-VARIANT types. This is optional because the nested type can be omitted. + // Currently only 1 subtype is supported. The field is defined as an array for + // flexibility of supporting multiple subtypes in the future. + variant_tensors:[VariantSubType]; +} + +// A list of builtin operators. Builtin operators are slightly faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +// LINT.IfChange +enum BuiltinOperator : int32 { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + DEPTH_TO_SPACE = 5, + DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + MUL = 18, + RELU = 19, + // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed + // since different model developers use RELU1 in different ways. Never + // create another op called RELU1. + RELU_N1_TO_1 = 20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + EMBEDDING_LOOKUP_SPARSE = 33, + PAD = 34, + UNIDIRECTIONAL_SEQUENCE_RNN = 35, + GATHER = 36, + BATCH_TO_SPACE_ND = 37, + SPACE_TO_BATCH_ND = 38, + TRANSPOSE = 39, + MEAN = 40, + SUB = 41, + DIV = 42, + SQUEEZE = 43, + UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + STRIDED_SLICE = 45, + BIDIRECTIONAL_SEQUENCE_RNN = 46, + EXP = 47, + TOPK_V2 = 48, + SPLIT = 49, + LOG_SOFTMAX = 50, + // DELEGATE is a special op type for the operations which are delegated to + // other backends. + // WARNING: Experimental interface, subject to change + DELEGATE = 51, + BIDIRECTIONAL_SEQUENCE_LSTM = 52, + CAST = 53, + PRELU = 54, + MAXIMUM = 55, + ARG_MAX = 56, + MINIMUM = 57, + LESS = 58, + NEG = 59, + PADV2 = 60, + GREATER = 61, + GREATER_EQUAL = 62, + LESS_EQUAL = 63, + SELECT = 64, + SLICE = 65, + SIN = 66, + TRANSPOSE_CONV = 67, + SPARSE_TO_DENSE = 68, + TILE = 69, + EXPAND_DIMS = 70, + EQUAL = 71, + NOT_EQUAL = 72, + LOG = 73, + SUM = 74, + SQRT = 75, + RSQRT = 76, + SHAPE = 77, + POW = 78, + ARG_MIN = 79, + FAKE_QUANT = 80, + REDUCE_PROD = 81, + REDUCE_MAX = 82, + PACK = 83, + LOGICAL_OR = 84, + ONE_HOT = 85, + LOGICAL_AND = 86, + LOGICAL_NOT = 87, + UNPACK = 88, + REDUCE_MIN = 89, + FLOOR_DIV = 90, + REDUCE_ANY = 91, + SQUARE = 92, + ZEROS_LIKE = 93, + FILL = 94, + FLOOR_MOD = 95, + RANGE = 96, + RESIZE_NEAREST_NEIGHBOR = 97, + LEAKY_RELU = 98, + SQUARED_DIFFERENCE = 99, + MIRROR_PAD = 100, + ABS = 101, + SPLIT_V = 102, + UNIQUE = 103, + CEIL = 104, + REVERSE_V2 = 105, + ADD_N = 106, + GATHER_ND = 107, + COS = 108, + WHERE = 109, + RANK = 110, + ELU = 111, + REVERSE_SEQUENCE = 112, + MATRIX_DIAG = 113, + QUANTIZE = 114, + MATRIX_SET_DIAG = 115, + ROUND = 116, + HARD_SWISH = 117, + IF = 118, + WHILE = 119, + NON_MAX_SUPPRESSION_V4 = 120, + NON_MAX_SUPPRESSION_V5 = 121, + SCATTER_ND = 122, + SELECT_V2 = 123, + DENSIFY = 124, + SEGMENT_SUM = 125, + BATCH_MATMUL = 126, + PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + CUMSUM = 128, + CALL_ONCE = 129, + BROADCAST_TO = 130, + RFFT2D = 131, + CONV_3D = 132, + IMAG=133, + REAL=134, + COMPLEX_ABS=135, + HASHTABLE = 136, + HASHTABLE_FIND = 137, + HASHTABLE_IMPORT = 138, + HASHTABLE_SIZE = 139, + REDUCE_ALL = 140, + CONV_3D_TRANSPOSE = 141, + VAR_HANDLE = 142, + READ_VARIABLE = 143, + ASSIGN_VARIABLE = 144, + BROADCAST_ARGS = 145, + RANDOM_STANDARD_NORMAL = 146, + BUCKETIZE = 147, + RANDOM_UNIFORM = 148, + MULTINOMIAL = 149, + GELU = 150, + DYNAMIC_UPDATE_SLICE = 151, + RELU_0_TO_1 = 152, + UNSORTED_SEGMENT_PROD = 153, + UNSORTED_SEGMENT_MAX = 154, + UNSORTED_SEGMENT_SUM = 155, + ATAN2 = 156, + UNSORTED_SEGMENT_MIN = 157, + SIGN = 158 +} +// LINT.ThenChange(nnapi_linter/linter.proto) + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, + EmbeddingLookupSparseOptions, + MulOptions, + PadOptions, + GatherOptions, + BatchToSpaceNDOptions, + SpaceToBatchNDOptions, + TransposeOptions, + ReducerOptions, + SubOptions, + DivOptions, + SqueezeOptions, + SequenceRNNOptions, + StridedSliceOptions, + ExpOptions, + TopKV2Options, + SplitOptions, + LogSoftmaxOptions, + CastOptions, + DequantizeOptions, + MaximumMinimumOptions, + ArgMaxOptions, + LessOptions, + NegOptions, + PadV2Options, + GreaterOptions, + GreaterEqualOptions, + LessEqualOptions, + SelectOptions, + SliceOptions, + TransposeConvOptions, + SparseToDenseOptions, + TileOptions, + ExpandDimsOptions, + EqualOptions, + NotEqualOptions, + ShapeOptions, + PowOptions, + ArgMinOptions, + FakeQuantOptions, + PackOptions, + LogicalOrOptions, + OneHotOptions, + LogicalAndOptions, + LogicalNotOptions, + UnpackOptions, + FloorDivOptions, + SquareOptions, + ZerosLikeOptions, + FillOptions, + BidirectionalSequenceLSTMOptions, + BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, + FloorModOptions, + RangeOptions, + ResizeNearestNeighborOptions, + LeakyReluOptions, + SquaredDifferenceOptions, + MirrorPadOptions, + AbsOptions, + SplitVOptions, + UniqueOptions, + ReverseV2Options, + AddNOptions, + GatherNdOptions, + CosOptions, + WhereOptions, + RankOptions, + ReverseSequenceOptions, + MatrixDiagOptions, + QuantizeOptions, + MatrixSetDiagOptions, + HardSwishOptions, + IfOptions, + WhileOptions, + DepthToSpaceOptions, + NonMaxSuppressionV4Options, + NonMaxSuppressionV5Options, + ScatterNdOptions, + SelectV2Options, + DensifyOptions, + SegmentSumOptions, + BatchMatMulOptions, + CumsumOptions, + CallOnceOptions, + BroadcastToOptions, + Rfft2dOptions, + Conv3DOptions, + HashtableOptions, + HashtableFindOptions, + HashtableImportOptions, + HashtableSizeOptions, + VarHandleOptions, + ReadVariableOptions, + AssignVariableOptions, + RandomOptions, + BucketizeOptions, + GeluOptions, + DynamicUpdateSliceOptions, + UnsortedSegmentProdOptions, + UnsortedSegmentMaxOptions, + UnsortedSegmentMinOptions, + UnsortedSegmentSumOptions, + ATan2Options, + SignOptions +} + +// LINT.IfChange +enum Padding : byte { SAME, VALID } +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// LINT.IfChange +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU_N1_TO_1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +// Options for both Conv3D and Conv3DTranspose. +table Conv3DOptions { + padding:Padding; + stride_d:int; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_d_factor:int = 1; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + // Parameters for DepthwiseConv version 1 or above. + padding:Padding; + stride_w:int; + stride_h:int; + // `depth_multiplier` is redundant. It's used by CPU kernels in + // TensorFlow 2.0 or below, but ignored in versions above. + // See comments in lite/c/builtin_op_data.h for more details. + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; + // Parameters for DepthwiseConv version 2 or above. + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; + // For weights-only quantization, use asymmetric quantization for non + // constant inputs at evaluation time. + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow dynamic_rnn with RNNCell. +table SequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell. +table BidirectionalSequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + merge_outputs: bool; + asymmetric_quantize_inputs:bool; +} + +// LINT.IfChange +enum FullyConnectedOptionsWeightsFormat: byte { + DEFAULT = 0, + SHUFFLED4x16INT8 = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + // Parameters for FullyConnected version 1 or above. + fused_activation_function:ActivationFunctionType; + + // Parameters for FullyConnected version 2 or above. + weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT; + + // Parameters for FullyConnected version 5 or above. + // If set to true, then the number of dimension is preserved. Furthermore, + // all but the last dimension of the input and output shapes will be equal. + keep_num_dims: bool; + + // Parameters for FullyConnected version 7 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 3. + pot_scale_int16:bool = true; +} + +table MulOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + // This field is currently ignored in the L2 Norm Op. + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// LINT.IfChange +enum LSTMKernelType : byte { + // Full LSTM kernel which supports peephole and projection. + FULL = 0, + // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell. + BASIC = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + // Parameters for LSTM version 1 or above. + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // Parameters for LSTM version 2 or above. + // Basic kernel is only supported in version 2 or above. + kernel_type: LSTMKernelType = FULL; + + // Parameters for LSTM version 4 or above. + asymmetric_quantize_inputs: bool; +} + +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true then first dimension is sequence, otherwise batch. + time_major:bool; + + // Parameter for Unidirectional Sequence LSTM version 3. + asymmetric_quantize_inputs:bool; + + // Parameter for unidirectional sequence RNN version 4. + diagonal_recurrent_tensors:bool; +} + +table BidirectionalSequenceLSTMOptions { + // Parameters supported by version 1: + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true, store the outputs of both directions into the first output. + merge_outputs: bool; + + // Parameters supported by version 2: + // If true then first dimension is sequence, otherwise batch. + // Version 1 implementations assumed time_major to be true, so this default + // value should never change. + time_major: bool = true; + + // Parameters for version 3 or above. + asymmetric_quantize_inputs:bool; +} + +table ResizeBilinearOptions { + new_height: int (deprecated); + new_width: int (deprecated); + align_corners: bool; + half_pixel_centers: bool; +} + +table ResizeNearestNeighborOptions { + align_corners: bool; + half_pixel_centers: bool; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table PadOptions { +} + +table PadV2Options { +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SpaceToBatchNDOptions { +} + +table BatchToSpaceNDOptions { +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +table DepthToSpaceOptions { + block_size: int; +} + +table SubOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; +} + +table DivOptions { + fused_activation_function:ActivationFunctionType; +} + +table TopKV2Options { +} + +enum CombinerType : byte { + SUM = 0, + MEAN = 1, + SQRTN = 2, +} + +table EmbeddingLookupSparseOptions { + combiner:CombinerType; +} + +table GatherOptions { + axis: int; + // Parameters for Gather version 5 or above. + batch_dims: int = 0; +} + +table TransposeOptions { +} + +table ExpOptions { +} + +table CosOptions { +} + +table ReducerOptions { + keep_dims: bool; +} + +table SqueezeOptions { + squeeze_dims:[int]; +} + +table SplitOptions { + num_splits: int; +} + +table SplitVOptions { + num_splits: int; +} + +table StridedSliceOptions { + begin_mask: int; + end_mask: int; + ellipsis_mask: int; + new_axis_mask: int; + shrink_axis_mask: int; +} + +table LogSoftmaxOptions { +} + +table CastOptions { + in_data_type: TensorType; + out_data_type: TensorType; +} + +table DequantizeOptions { +} + +table MaximumMinimumOptions { +} + +table TileOptions { +} + +table ArgMaxOptions { + output_type : TensorType; +} + +table ArgMinOptions { + output_type : TensorType; +} + +table GreaterOptions { +} + +table GreaterEqualOptions { +} + +table LessOptions { +} + +table LessEqualOptions { +} + +table NegOptions { +} + +table SelectOptions { +} + +table SliceOptions { +} + +table TransposeConvOptions { + // Parameters supported by version 1, 2, 3: + padding:Padding; + stride_w:int; + stride_h:int; + + // Parameters supported by version 4: + fused_activation_function:ActivationFunctionType = NONE; +} + +table ExpandDimsOptions { +} + +table SparseToDenseOptions { + validate_indices:bool; +} + +table EqualOptions { +} + +table NotEqualOptions { +} + +table ShapeOptions { + // Optional output type of the operation (int32 or int64). Defaults to int32. + out_type : TensorType; +} + +table RankOptions { +} + +table PowOptions { +} + +table FakeQuantOptions { + // Parameters supported by version 1: + min:float; + max:float; + num_bits:int; + + // Parameters supported by version 2: + narrow_range:bool; +} + +table PackOptions { + values_count:int; + axis:int; +} + +table LogicalOrOptions { +} + +table OneHotOptions { + axis:int; +} + +table AbsOptions { +} + + +table HardSwishOptions { +} + +table LogicalAndOptions { +} + +table LogicalNotOptions { +} + +table UnpackOptions { + num:int; + axis:int; +} + +table FloorDivOptions { +} + +table SquareOptions { +} + +table ZerosLikeOptions { +} + +table FillOptions { +} + +table FloorModOptions { +} + +table RangeOptions { +} + +table LeakyReluOptions { + alpha:float; +} + +table SquaredDifferenceOptions { +} + +// LINT.IfChange +enum MirrorPadMode : byte { + // Doesn't include borders. + REFLECT = 0, + // Includes borders. + SYMMETRIC = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +table MirrorPadOptions { + mode:MirrorPadMode; +} + +table UniqueOptions { + idx_out_type:TensorType = INT32; +} + +table ReverseV2Options { +} + +table AddNOptions { +} + +table GatherNdOptions { +} + +table WhereOptions { +} + +table ReverseSequenceOptions { + seq_dim:int; + batch_dim:int = 0; +} + +table MatrixDiagOptions { +} + +table QuantizeOptions { +} + +table MatrixSetDiagOptions { +} + +table IfOptions { + then_subgraph_index:int; + else_subgraph_index:int; +} + +table CallOnceOptions { + init_subgraph_index:int; +} + +table WhileOptions { + cond_subgraph_index:int; + body_subgraph_index:int; +} + +table NonMaxSuppressionV4Options { +} + +table NonMaxSuppressionV5Options { +} + +table ScatterNdOptions { +} + +table SelectV2Options { +} + +table DensifyOptions { +} + +table SegmentSumOptions { +} + +table BatchMatMulOptions { + adj_x:bool; + adj_y:bool; + // Parameters for BatchMatMul version 4 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table CumsumOptions { + exclusive:bool; + reverse:bool; +} + +table BroadcastToOptions { +} + +table Rfft2dOptions { +} + +table HashtableOptions { + // The identity of hash tables. This identity will be used across different + // subgraphs in the same interpreter instance. + table_id:int; + key_dtype:TensorType; + value_dtype:TensorType; +} + +table HashtableFindOptions { +} + +table HashtableImportOptions { +} + +table HashtableSizeOptions { +} + +table VarHandleOptions { + container:string; + shared_name:string; +} + +table ReadVariableOptions { +} + +table AssignVariableOptions { +} + +table RandomOptions { + seed: long; + seed2: long; +} + +table BucketizeOptions { + boundaries: [float]; // The bucket boundaries. +} + +table GeluOptions { + approximate: bool; +} + +table DynamicUpdateSliceOptions { +} + +table UnsortedSegmentProdOptions { +} + +table UnsortedSegmentMaxOptions { +} + +table UnsortedSegmentSumOptions { +} + +table ATan2Options { +} + +table UnsortedSegmentMinOptions{ +} + +table SignOptions { +} + + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + // This field is for backward compatibility. This field will be used when + // the value of the extended builtin_code field has less than + // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + deprecated_builtin_code:byte; + custom_code:string; + + // The version of the operator. The version need to be bumped whenever new + // parameters are introduced into an op. + version:int = 1; + + // This field is introduced for resolving op builtin code shortage problem + // (the original BuiltinOperator enum field was represented as a byte). + // This field will be used when the value of the extended builtin_code field + // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + builtin_code:BuiltinOperator; +} + +enum CustomOptionsFormat : byte { + FLEXBUFFERS = 0, +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + // Optional input are indicated by -1. + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; + custom_options_format:CustomOptionsFormat; + + // A list of booleans indicating the input tensors which are being mutated by + // this operator.(e.g. used by RNN and LSTM). + // For example, if the "inputs" array refers to 5 tensors and the second and + // fifth are mutable variables, then this list will contain + // [false, true, false, false, true]. + // + // If the list is empty, no variable is mutated in this operator. + // The list either has the same length as `inputs`, or is empty. + mutating_variable_inputs:[bool]; + + // A list of indices to the subgraph's "tensors" that are internal to an Op. + // Internal tensors are those that do not flow in or out of the operation, + // but instead are part of internal computation. As such, the operation's + // implementation may manage its memory more efficiently. They are needed + // however (i.e. not just an implementation detail) since they are part of the + // computation, which may require relevant metadata such as quantization + // parameters. + intermediates:[int]; +} + +// The root type, defining a subgraph, which typically represents an entire +// model. +table SubGraph { + // A list of all tensors used in this subgraph. + tensors:[Tensor]; + + // Indices of the tensors that are inputs into this subgraph. Note this is + // the list of non-static tensors that feed into the subgraph for inference. + inputs:[int]; + + // Indices of the tensors that are outputs out of this subgraph. Note this is + // the list of output tensors that are considered the product of the + // subgraph's inference. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of this subgraph (used for debugging). + name:string; +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. The generous alignment accommodates mmap-friendly data structures. +table Buffer { + data:[ubyte] (force_align: 16); +} + +table Metadata { + // A human readable string to uniquely identify a Metadata. + name:string; + // An index to the buffers table. + buffer:uint; +} + +// Map from an alias name of tensor to tensor index in the graph. +// This is used in Signature def. +table TensorMap { + // Represents the alias to use for this tensor. + name:string; + + // The actual tensor index in the primary graph, that 'name' corresponds to. + tensor_index:uint; +} + +// This corresponds to SignatureDef in Tensorflow SavedModel. +// The SignatureDef will be part of the SavedModel provided for conversion. +table SignatureDef { + // Named inputs for this signature. + inputs:[TensorMap]; + + // Named outputs for this signature. + outputs:[TensorMap]; + + // Key value which was in the Tensorflow SavedModel SignatureDef map. + signature_key:string; + + // Model tag, deprecated. + deprecated_tag:string (deprecated); + + // Index of subgraphs that corresponds to the exported method. + subgraph_index:uint; +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model. + // Note the 0th entry of this array must be an empty buffer (sentinel). + // This is a convention so that tensors without a buffer can provide 0 as + // their buffer. + buffers:[Buffer]; + + // Metadata about the model. Indirects into the existings buffers list. + // Deprecated, prefer to use metadata field. + metadata_buffer:[int]; + + // Metadata about the model. + metadata:[Metadata]; + + // Optional SignatureDefs for the model. + signature_defs:[SignatureDef]; +} + +root_type Model; diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp new file mode 100644 index 0000000000..ee051547f9 --- /dev/null +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -0,0 +1,644 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" + +#ifdef HAVE_FLATBUFFERS +#include "schema_generated.h" +#include "builtin_op_data.h" +#endif + +#include +#undef CV_LOG_STRIP_LEVEL +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +#ifdef HAVE_FLATBUFFERS + +using namespace opencv_tflite; + +// This values are used to indicate layer output's data layout where it's possible. +// Approach is similar to TensorFlow importer but TFLite models do not have explicit +// layout field "data_format". So we consider that all 4D inputs are in NHWC data layout. +enum DataLayout +{ + DATA_LAYOUT_NHWC, + DATA_LAYOUT_NCHW, + DATA_LAYOUT_NDHWC, + DATA_LAYOUT_UNKNOWN, + DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) +}; + +class TFLiteImporter { +public: + TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize); + +private: + const opencv_tflite::Model* model; + const flatbuffers::Vector >* modelTensors; + std::map allTensors; + Net& dstNet; + + // This is a vector of pairs (layerId, outputId) where we iterate over + // indices from TFLite notation and get created OpenCV layers. + std::map > layerIds; + + // Tracking of layouts for layers outputs. + std::vector layouts; + + void populateNet(); + + // Wrap TFLite Tensor to OpenCV Mat without data copying + Mat parseTensor(const Tensor& tensor); + + typedef void (TFLiteImporter::*TFLiteImporterNodeParser)(const Operator&, const std::string&, LayerParams&); + typedef std::map DispatchMap; + + const DispatchMap dispatch; + static DispatchMap buildDispatchMap(); + + void parseConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseDWConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePadding(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseEltwise(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePooling(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePoolingWithArgmax(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); + + int addPermuteLayer(const std::vector& order, const std::string& permName, const std::pair& inpId); +}; + +Mat TFLiteImporter::parseTensor(const Tensor& tensor) +{ + const auto tensor_shape = tensor.shape(); + CV_Assert(tensor_shape); + std::vector shape(tensor_shape->begin(), tensor_shape->end()); + int bufferIdx = tensor.buffer(); + CV_Assert(bufferIdx != 0); // 0th buffer is a no-data buffer + const Buffer* buffer = model->buffers()->Get(bufferIdx); + CV_Assert(buffer); + const auto buffer_data = buffer->data(); + CV_Assert(buffer_data); + const void* data = buffer_data->data(); + + int dtype = -1; + switch (tensor.type()) { + case TensorType_FLOAT32: + dtype = CV_32F; + break; + case TensorType_INT32: + dtype = CV_32S; + break; + case TensorType_FLOAT16: + dtype = CV_16S; + break; + default: + CV_Error(Error::StsNotImplemented, format("Parse tensor with type %s", EnumNameTensorType(tensor.type()))); + } + return Mat(shape, dtype, const_cast(data)); +} + +TFLiteImporter::TFLiteImporter(Net& dstNet, const char* modelBuffer, size_t bufSize) + : dstNet(dstNet), dispatch(buildDispatchMap()) +{ + flatbuffers::Verifier verifier((const uint8_t*)modelBuffer, bufSize); + if (!VerifyModelBuffer(verifier)) { + CV_Error(Error::StsError, "DNN/TFLite: model is incorrect"); + } + + model = GetModel(modelBuffer); + CV_Assert(model); + CV_Assert(model->subgraphs()); + CV_Assert(model->buffers()); + CV_CheckEQ(model->subgraphs()->size(), 1, ""); + + modelTensors = model->subgraphs()->Get(0)->tensors(); + CV_Assert(modelTensors); + for (int i = 0; i < modelTensors->size(); ++i) { + const Tensor* tensor = modelTensors->Get(i); + CV_Assert(tensor); + if (tensor->buffer() != 0) { + allTensors[i] = parseTensor(*tensor); + } + } + + populateNet(); +} + +DataLayout estimateLayout(const Tensor& t) +{ + const auto t_shape = t.shape(); + CV_Assert(t_shape); + switch (t_shape->size()) { + case 5: return DATA_LAYOUT_NDHWC; + case 4: return DATA_LAYOUT_NHWC; + case 2: return DATA_LAYOUT_PLANAR; + default: return DATA_LAYOUT_UNKNOWN; + } +} + +void TFLiteImporter::populateNet() +{ + CV_Assert(model); + const auto model_subgraphs = model->subgraphs(); + CV_Assert(model_subgraphs); + const SubGraph* subgraph = model_subgraphs->Get(0); + CV_Assert(subgraph); + const auto subgraph_inputs = subgraph->inputs(); + CV_Assert(subgraph_inputs); + const auto subgraph_operators = subgraph->operators(); + CV_Assert(subgraph_operators); + const auto opCodes = model->operator_codes(); + CV_Assert(opCodes); + + CV_Assert(modelTensors); + layouts.resize(modelTensors->size(), DATA_LAYOUT_UNKNOWN); + size_t subgraph_inputs_size = subgraph_inputs->size(); + for (size_t i = 0; i < subgraph_inputs_size; ++i) + { + int idx = subgraph_inputs->Get(i); + layerIds[idx] = std::make_pair(0, i); + const auto tensor = modelTensors->Get(idx); + if (!tensor) + CV_Error(Error::StsError, cv::format("DNN/TFLite: subgraph input %d (%d) is NULL", (int)i, idx)); + layouts[idx] = estimateLayout(*tensor); + } + const auto& all_operators = *subgraph_operators; + const size_t all_operators_size = all_operators.size(); + for (size_t op_idx = 0; op_idx < all_operators_size; ++op_idx) + { + const auto op = all_operators[op_idx]; + CV_Assert(op); + const auto op_inputs = op->inputs(); + CV_Assert(op_inputs); + const auto op_outputs = op->outputs(); + CV_Assert(op_outputs); + int idx = op->opcode_index(); + + LayerParams layerParams; + layerParams.name = modelTensors->Get(op_outputs->Get(0))->name()->str(); + + std::string type = EnumNameBuiltinOperator(BuiltinOperator(opCodes->Get(idx)->deprecated_builtin_code())); + if (type == "CUSTOM") { + type = opCodes->Get(idx)->custom_code()->str(); + } + + CV_LOG_DEBUG(NULL, "DNN/TFLite: processing operator (" << op_idx << "/" << all_operators_size << ") with " << op_inputs->size() << " inputs: " + << cv::format("[%s]:(%s)", type.c_str(), layerParams.name.c_str())); + + try + { + if (type == "DEQUANTIZE") { + // Convert from FP16 to FP32 + Mat data = allTensors[op_inputs->Get(0)]; + Mat dataFP32; + convertFp16(data, dataFP32); + allTensors[op_outputs->Get(0)] = dataFP32; + continue; + } + + DispatchMap::const_iterator iter = dispatch.find(type); + if (iter == dispatch.end()) + CV_Error(Error::StsNotImplemented, "Unsupported operator type " + type); + + CALL_MEMBER_FN(*this, iter->second)(*op, type, layerParams); + + // Collect input blobs + std::vector layerInputs; + std::vector inpLayouts; + for (int idx : *op_inputs) { + if (layerIds.find(idx) != layerIds.end()) { + layerInputs.push_back(idx); + inpLayouts.push_back(layouts[idx]); + continue; // Output from a different layer + } + + Mat blob = allTensors[idx]; + layerParams.blobs.push_back(blob.u ? blob : blob.clone()); // some tensors are owned by OpenCV + } + + int layerId = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + + // Connect layer to inputs + int i = 0; + for (int idx : layerInputs) { + auto it = layerIds.find(idx); + CV_Assert(it != layerIds.end()); + dstNet.connect(it->second.first, it->second.second, layerId, i++); + } + + // Predict output layout. Some layer-specific parsers may set them explicitly. + // Otherwise, propagate input layout. + if (layouts[op_outputs->Get(0)] == DATA_LAYOUT_UNKNOWN) { + DataLayout predictedLayout = DATA_LAYOUT_UNKNOWN; + for (auto layout : inpLayouts) { + if (layout != DATA_LAYOUT_UNKNOWN) { + if (predictedLayout == DATA_LAYOUT_UNKNOWN) + predictedLayout = layout; + else if (predictedLayout != layout) { + predictedLayout = DATA_LAYOUT_UNKNOWN; + break; + } + } + } + layouts[op_outputs->Get(0)] = predictedLayout; + } + + // Register outputs + i = 0; + for (int idx : *op_outputs) { + layerIds[idx] = std::make_pair(layerId, i++); + } + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TFLite: Problem during import of operator " + << cv::format("[%s]:(%s)", type.c_str(), layerParams.name.c_str()) + << " (" << op_idx << "/" << all_operators_size << "). Exception: " << e.what()); + if (DNN_DIAGNOSTICS_RUN) + { + continue; + } + throw; + } + } +} + +TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap() +{ + static DispatchMap dispatch; + if (!dispatch.empty()) + return dispatch; + + dispatch["CONV_2D"] = &TFLiteImporter::parseConvolution; + dispatch["DEPTHWISE_CONV_2D"] = &TFLiteImporter::parseDWConvolution; + dispatch["RELU"] = dispatch["ADD"] = dispatch["MUL"] = dispatch["PRELU"] = + dispatch["HARD_SWISH"] = dispatch["LOGISTIC"] = &TFLiteImporter::parseEltwise; + dispatch["MAX_POOL_2D"] = dispatch["AVERAGE_POOL_2D"] = &TFLiteImporter::parsePooling; + dispatch["MaxPoolingWithArgmax2D"] = &TFLiteImporter::parsePoolingWithArgmax; + dispatch["MaxUnpooling2D"] = &TFLiteImporter::parseUnpooling; + dispatch["PAD"] = &TFLiteImporter::parsePadding; + dispatch["RESHAPE"] = &TFLiteImporter::parseReshape; + dispatch["CONCATENATION"] = &TFLiteImporter::parseConcat; + dispatch["RESIZE_BILINEAR"] = &TFLiteImporter::parseResize; + dispatch["Convolution2DTransposeBias"] = &TFLiteImporter::parseDeconvolution; + return dispatch; +} + +void TFLiteImporter::parseConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Convolution"; + + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Convolution with fused activation"); + } + layerParams.set("pad_mode", EnumNamePadding(options->padding())); + layerParams.set("stride_w", options->stride_w()); + layerParams.set("stride_h", options->stride_h()); + layerParams.set("dilation_w", options->dilation_w_factor()); + layerParams.set("dilation_h", options->dilation_h_factor()); + + // Get filter size + int filterIdx = op.inputs()->Get(1); + Mat filter = allTensors[filterIdx]; + int oc = filter.size[0]; + int kh = filter.size[1]; + int kw = filter.size[2]; + int ic = filter.size[3]; + layerParams.set("kernel_w", kw); + layerParams.set("kernel_h", kh); + layerParams.set("num_output", oc); + + // Reorder filter data from OHWI to OIHW and change shape correspondingly. + filter = allTensors[filterIdx] = filter.reshape(1, {oc, ic, kh, kw}); + + CV_CheckTypeEQ(filter.type(), CV_32F, ""); + Mat filterCopy = filter.clone(); + float* data = filterCopy.ptr(); + float* dstData = filter.ptr(); + + int total = oc * ic * kh * kw; + for (int i_oc = 0; i_oc < oc; i_oc++) { + for (int i_ic = 0; i_ic < ic; i_ic++) { + for (int i_h = 0; i_h < kh; i_h++) { + for (int i_w = 0; i_w < kw; i_w++) { + int dst_i = kw * (kh * (ic * i_oc + i_ic) + i_h) + i_w; + int src_i = ic * (kw * (kh * i_oc + i_h) + i_w) + i_ic; + CV_CheckLT(dst_i, total, ""); + CV_CheckLT(src_i, total, ""); + dstData[dst_i] = data[src_i]; + } + } + } + } +} + +void TFLiteImporter::parseDWConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Convolution"; + + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Depthwise convolution with fused activation"); + } + layerParams.set("pad_mode", EnumNamePadding(options->padding())); + layerParams.set("stride_w", options->stride_w()); + layerParams.set("stride_h", options->stride_h()); + layerParams.set("dilation_w", options->dilation_w_factor()); + layerParams.set("dilation_h", options->dilation_h_factor()); + + int filterIdx = op.inputs()->Get(1); + Mat filter = allTensors[filterIdx]; + int kh = filter.size[1]; + int kw = filter.size[2]; + int oc = filter.size[3]; + layerParams.set("kernel_w", kw); + layerParams.set("kernel_h", kh); + layerParams.set("num_output", oc); + layerParams.set("group", oc); + + filter = allTensors[filterIdx] = filter.reshape(1, {oc, 1, kh, kw}); + cv::transpose(filter.reshape(1, kh * kw).clone(), filter.reshape(1, oc)); +} + +void TFLiteImporter::parsePadding(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Padding"; + Mat paddings = allTensors[op.inputs()->Get(1)]; + + CV_CheckTypeEQ(paddings.type(), CV_32S, ""); + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(2), paddings.at(6)); + std::swap(paddings.at(3), paddings.at(7)); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(4), paddings.at(6)); + std::swap(paddings.at(5), paddings.at(7)); + // N C H W + // 0 1 2 3 4 5 6 7 + + layerParams.set("paddings", DictValue::arrayInt((int32_t*)paddings.data, paddings.total())); +} + +void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + if (opcode == "PRELU") { + layerParams.type = "PReLU"; + } else if (opcode == "RELU") { + layerParams.type = "ReLU"; + } else if (opcode == "ADD") { + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Add with fused activation"); + } + layerParams.type = "Eltwise"; + layerParams.set("operation", "sum"); + } else if (opcode == "MUL") { + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Mul with fused activation"); + } + layerParams.type = "Eltwise"; + layerParams.set("operation", "prod"); + } else if (opcode == "HARD_SWISH") { + layerParams.type = "HardSwish"; + } else if (opcode == "LOGISTIC") { + layerParams.type = "Sigmoid"; + } else { + CV_Error(Error::StsNotImplemented, "Unknown eltwise operator opcode: " + opcode); + } +} + +void TFLiteImporter::parsePooling(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Pooling"; + + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Pooling with fused activation"); + } + layerParams.set("pad_mode", EnumNamePadding(options->padding())); + layerParams.set("stride_w", options->stride_w()); + layerParams.set("stride_h", options->stride_h()); + layerParams.set("kernel_w", options->filter_width()); + layerParams.set("kernel_h", options->filter_height()); + if (opcode == "MAX_POOL_2D") + layerParams.set("pool", "max"); + else if (opcode == "AVERAGE_POOL_2D") + layerParams.set("pool", "ave"); + else + CV_Error(Error::StsNotImplemented, "Pool type selection for " + opcode); +} + +void TFLiteImporter::parsePoolingWithArgmax(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Pooling"; + + CV_CheckLE(op.custom_options()->size(), sizeof(TfLitePoolParams), ""); + const auto* params = reinterpret_cast(op.custom_options()->Data()); + if (params->activation != kTfLiteActNone) { + CV_Error(Error::StsNotImplemented, "Argmax pooling with fused activation"); + } + if (params->padding != kTfLitePaddingUnknown) + layerParams.set("pad_mode", params->padding == kTfLitePaddingSame ? "SAME" : "VALID"); + layerParams.set("stride_w", params->stride_width); + layerParams.set("stride_h", params->stride_height); + layerParams.set("kernel_w", params->filter_width); + layerParams.set("kernel_h", params->filter_height); + layerParams.set("pool", "max"); +} + +void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "MaxUnpool"; + + CV_CheckLE(op.custom_options()->size(), sizeof(TfLitePoolParams), ""); + const auto* params = reinterpret_cast(op.custom_options()->Data()); + if (params->activation != kTfLiteActNone) { + CV_Error(Error::StsNotImplemented, "Unpooling with fused activation"); + } + layerParams.set("pool_stride_w", params->stride_width); + layerParams.set("pool_stride_h", params->stride_height); + layerParams.set("pool_k_w", params->filter_width); + layerParams.set("pool_k_h", params->filter_height); + layerParams.set("pool_pad_w", 0); + layerParams.set("pool_pad_h", 0); +} + +void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + DataLayout inpLayout = layouts[op.inputs()->Get(0)]; + + if (inpLayout == DATA_LAYOUT_NHWC) { + // Permute to NCHW + int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC + layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0); + layouts[op.outputs()->Get(0)] = DATA_LAYOUT_NCHW; + } + + layerParams.type = "Reshape"; + auto options = reinterpret_cast(op.builtin_options()); + std::vector shape(options->new_shape()->begin(), options->new_shape()->end()); + // std::swap(shape[1], shape[2]); + layerParams.set("dim", DictValue::arrayInt(shape.data(), shape.size())); +} + +void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Concat"; + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Concat with fused activation"); + } + int axis = options->axis(); + + DataLayout inpLayout = layouts[op.inputs()->Get(0)]; + if (inpLayout == DATA_LAYOUT_NHWC) { + // OpenCV works in NCHW data layout. So change the axis correspondingly. + CV_Check(axis, -4 < axis && axis < 4, ""); + int remap[] = {0, 2, 3, 1}; + axis = axis > 0 ? axis : 4 + axis; + axis = remap[axis]; + } + layerParams.set("axis", axis); +} + +void TFLiteImporter::parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Resize"; + + auto options = reinterpret_cast(op.builtin_options()); + + layerParams.set("interpolation", "bilinear"); + layerParams.set("align_corners", options->align_corners()); + layerParams.set("half_pixel_centers", options->half_pixel_centers()); + + Mat shape = allTensors[op.inputs()->Get(1)].reshape(1, 1); + layerParams.set("height", shape.at(0, 0)); + layerParams.set("width", shape.at(0, 1)); +} + +int TFLiteImporter::addPermuteLayer(const std::vector& order, const std::string& permName, + const std::pair& inpId) +{ + LayerParams permLP; + permLP.set("order", DictValue::arrayInt(order.data(), order.size())); + int permId = dstNet.addLayer(permName, "Permute", permLP); + dstNet.connect(inpId.first, inpId.second, permId, 0); + return permId; +} + +void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Deconvolution"; + + CV_CheckLE(op.custom_options()->size(), sizeof(TfLiteTransposeConvParams), ""); + const auto* params = reinterpret_cast(op.custom_options()->Data()); + if (params->padding != kTfLitePaddingUnknown) + layerParams.set("pad_mode", params->padding == kTfLitePaddingSame ? "SAME" : "VALID"); + layerParams.set("stride_w", params->stride_width); + layerParams.set("stride_h", params->stride_height); + + // Get filter size + int filterIdx = op.inputs()->Get(1); + Mat filter = allTensors[filterIdx]; + int oc = filter.size[0]; + int kh = filter.size[1]; + int kw = filter.size[2]; + int ic = filter.size[3]; + layerParams.set("kernel_w", kw); + layerParams.set("kernel_h", kh); + layerParams.set("num_output", oc); + + // Add adjust padding similar to TensorFlow (see tf_importer) + const auto* outShape = modelTensors->Get(op.outputs()->Get(0))->shape(); + const int outH = outShape->Get(1); + const int outW = outShape->Get(2); + if (params->padding == kTfLitePaddingSame) + { + layerParams.set("adj_w", (outW - 1) % params->stride_width); + layerParams.set("adj_h", (outH - 1) % params->stride_height); + } + else if (params->padding == kTfLitePaddingValid) + { + layerParams.set("adj_w", (outW - kw) % params->stride_width); + layerParams.set("adj_h", (outH - kh) % params->stride_height); + } + + // Reorder filter data from OHWI to IOHW and change shape correspondingly. + filter = allTensors[filterIdx] = filter.reshape(1, {ic, oc, kh, kw}); + + CV_CheckTypeEQ(filter.type(), CV_32F, ""); + Mat filterCopy = filter.clone(); + float* data = filterCopy.ptr(); + float* dstData = filter.ptr(); + + int total = oc * ic * kh * kw; + for (int i_oc = 0; i_oc < oc; i_oc++) { + for (int i_ic = 0; i_ic < ic; i_ic++) { + for (int i_h = 0; i_h < kh; i_h++) { + for (int i_w = 0; i_w < kw; i_w++) { + int dst_i = kw * (kh * (oc * i_ic + i_oc) + i_h) + i_w; + int src_i = ic * (kw * (kh * i_oc + i_h) + i_w) + i_ic; + CV_CheckLT(dst_i, total, ""); + CV_CheckLT(src_i, total, ""); + dstData[dst_i] = data[src_i]; + } + } + } + } +} + +Net readNetFromTFLite(const String &modelPath) { + Net net; + + std::vector content; + + const std::ios::openmode mode = std::ios::in | std::ios::binary; + std::ifstream ifs(modelPath, mode); + if (!ifs.is_open()) + CV_Error(Error::StsError, cv::format("DNN/TFLite: can't open model file '%s'", modelPath.c_str())); + + ifs.seekg(0, std::ios::end); + const size_t sz = ifs.tellg(); + CV_Assert(sz > 0); + content.resize(sz); + ifs.seekg(0, std::ios::beg); + + ifs.read(content.data(), sz); + CV_Assert(!ifs.bad()); + + TFLiteImporter(net, content.data(), content.size()); + return net; +} + +Net readNetFromTFLite(const std::vector& bufferModel) { + return readNetFromTFLite((const char*)bufferModel.data(), bufferModel.size()); +} + +Net readNetFromTFLite(const char *bufferModel, size_t bufSize) { + Net net; + TFLiteImporter(net, bufferModel, bufSize); + return net; +} + +#else // HAVE_FLATBUFFERS + +#define DNN_TFLITE_UNSUPPORTED() CV_Error(Error::StsError, "DNN/TFLite: Build OpenCV with FlatBuffers to import TFLite models: https://github.com/opencv/opencv/pull/23161") + +Net readNetFromTFLite(const String &) { + DNN_TFLITE_UNSUPPORTED(); +} + +Net readNetFromTFLite(const std::vector&) { + DNN_TFLITE_UNSUPPORTED(); +} + +Net readNetFromTFLite(const char *, size_t) { + DNN_TFLITE_UNSUPPORTED(); +} + +#endif // HAVE_FLATBUFFERS + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp new file mode 100644 index 0000000000..41c3133593 --- /dev/null +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -0,0 +1,123 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +/* +Test for TFLite models loading +*/ + +#include "test_precomp.hpp" +#include "npy_blob.hpp" + +#include // CV_DNN_REGISTER_LAYER_CLASS +#include + +namespace opencv_test +{ + +using namespace cv; +using namespace cv::dnn; + +void testModel(const std::string& modelName, const Mat& input, double norm = 1e-5) { +#ifndef HAVE_FLATBUFFERS + throw SkipTestException("FlatBuffers required for TFLite importer"); +#endif + + Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); + net.setInput(input); + + std::vector outNames = net.getUnconnectedOutLayersNames(); + + std::vector outs; + net.forward(outs, outNames); + + ASSERT_EQ(outs.size(), outNames.size()); + for (int i = 0; i < outNames.size(); ++i) { + Mat ref = blobFromNPY(findDataFile(format("dnn/tflite/%s_out_%s.npy", modelName.c_str(), outNames[i].c_str()))); + normAssert(ref.reshape(1, 1), outs[i].reshape(1, 1), outNames[i].c_str(), norm); + } +} + +void testModel(const std::string& modelName, const Size& inpSize, double norm = 1e-5) { + Mat input = imread(findDataFile("cv/shared/lena.png")); + input = blobFromImage(input, 1.0 / 255, inpSize, 0, true); + testModel(modelName, input, norm); +} + +// https://google.github.io/mediapipe/solutions/face_mesh +TEST(Test_TFLite, face_landmark) +{ + testModel("face_landmark", Size(192, 192), 2e-5); +} + +// https://google.github.io/mediapipe/solutions/face_detection +TEST(Test_TFLite, face_detection_short_range) +{ + testModel("face_detection_short_range", Size(128, 128)); +} + +// https://google.github.io/mediapipe/solutions/selfie_segmentation +TEST(Test_TFLite, selfie_segmentation) +{ + testModel("selfie_segmentation", Size(256, 256)); +} + +TEST(Test_TFLite, max_unpooling) +{ +#ifndef HAVE_FLATBUFFERS + throw SkipTestException("FlatBuffers required for TFLite importer"); +#endif + // Due Max Unpoling is a numerically unstable operation and small difference between frameworks + // might lead to positional difference of maximal elements in the tensor, this test checks + // behavior of Max Unpooling layer only. + Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false)); + + Mat input = imread(findDataFile("cv/shared/lena.png")); + cvtColor(input, input, COLOR_BGR2RGBA); + input = input.mul(Scalar(1, 1, 1, 0)); + input = blobFromImage(input, 1.0 / 255); + net.setInput(input); + + std::vector > outs; + net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); + ASSERT_EQ(outs.size(), 4); + ASSERT_EQ(outs[0].size(), 1); + ASSERT_EQ(outs[1].size(), 2); + ASSERT_EQ(outs[2].size(), 1); + ASSERT_EQ(outs[3].size(), 1); + Mat poolInp = outs[0][0]; + Mat poolOut = outs[1][0]; + Mat poolIds = outs[1][1]; + Mat unpoolInp = outs[2][0]; + Mat unpoolOut = outs[3][0]; + + ASSERT_EQ(poolInp.size, unpoolOut.size); + ASSERT_EQ(poolOut.size, poolIds.size); + ASSERT_EQ(poolOut.size, unpoolInp.size); + + for (int c = 0; c < 32; ++c) { + float *poolInpData = poolInp.ptr(0, c); + float *poolOutData = poolOut.ptr(0, c); + float *poolIdsData = poolIds.ptr(0, c); + float *unpoolInpData = unpoolInp.ptr(0, c); + float *unpoolOutData = unpoolOut.ptr(0, c); + for (int y = 0; y < 64; ++y) { + for (int x = 0; x < 64; ++x) { + int maxIdx = (y * 128 + x) * 2; + std::vector indices{maxIdx + 1, maxIdx + 128, maxIdx + 129}; + std::string errMsg = format("Channel %d, y: %d, x: %d", c, y, x); + for (int idx : indices) { + if (poolInpData[idx] > poolInpData[maxIdx]) { + EXPECT_EQ(unpoolOutData[maxIdx], 0.0f) << errMsg; + maxIdx = idx; + } + } + EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg; + EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg; + EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg; + } + } + } +} + +} diff --git a/platforms/js/opencv_js.config.py b/platforms/js/opencv_js.config.py index 903d1dd6fa..fe91e3334e 100644 --- a/platforms/js/opencv_js.config.py +++ b/platforms/js/opencv_js.config.py @@ -135,7 +135,7 @@ video = { dnn = {'dnn_Net': ['setInput', 'forward', 'setPreferableBackend'], '': ['readNetFromCaffe', 'readNetFromTensorflow', 'readNetFromTorch', 'readNetFromDarknet', - 'readNetFromONNX', 'readNet', 'blobFromImage']} + 'readNetFromONNX', 'readNetFromTFLite', 'readNet', 'blobFromImage']} features2d = {'Feature2D': ['detect', 'compute', 'detectAndCompute', 'descriptorSize', 'descriptorType', 'defaultNorm', 'empty', 'getDefaultName'], 'BRISK': ['create', 'getDefaultName'],