Merge pull request #21372 from zihaomu:dnn_quantize_per_tensor

Add per_tensor_quantize to int8 quantize * add per_tensor_quantize to dnn int8 module. * change api flag from perTensor to perChannel, and recognize quantize type and onnx importer. * change the default to hpp
2022-07-06 00:14:42 +08:00
parent 16b5fd4bf2
commit a80fcacd90
13 changed files with 160 additions and 42 deletions
@@ -263,6 +263,10 @@ CV__DNN_INLINE_NS_BEGIN
    public:
        int input_zp, output_zp;
        float input_sc, output_sc;
+
+        // quantization type flag. The perChannel default is true, that means it contains the parameters
+        // of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
+        bool per_channel;
        static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
    };

@@ -368,6 +372,10 @@ CV__DNN_INLINE_NS_BEGIN
    public:
        int input_zp, output_zp;
        float input_sc, output_sc;
+
+        // quantization type flag. The perChannel default is true, that means it contains the parameters
+        // of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
+        bool per_channel;
        static Ptr<InnerProductLayerInt8> create(const LayerParams& params);
    };

@@ -621,8 +621,10 @@ CV__DNN_INLINE_NS_BEGIN
         *  @param calibData Calibration data to compute the quantization parameters.
         *  @param inputsDtype Datatype of quantized net's inputs. Can be CV_32F or CV_8S.
         *  @param outputsDtype Datatype of quantized net's outputs. Can be CV_32F or CV_8S.
+         *  @param perChannel Quantization granularity of quantized Net. The default is true, that means quantize model
+         *  in per-channel way (channel-wise). Set it false to quantize model in per-tensor way (or tensor-wise).
         */
-        CV_WRAP Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype);
+        CV_WRAP Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel=true);

        /** @brief Returns input scale and zeropoint for a quantized Net.
         *  @param scales output parameter for returning input scales.