Merge pull request #21372 from zihaomu:dnn_quantize_per_tensor

Add per_tensor_quantize to int8 quantize

* add per_tensor_quantize to dnn int8 module.

* change api flag from perTensor to perChannel, and recognize quantize type and onnx importer.

* change the default to hpp
This commit is contained in:
Zihao Mu
2022-07-06 00:14:42 +08:00
committed by GitHub
parent 16b5fd4bf2
commit a80fcacd90
13 changed files with 160 additions and 42 deletions
@@ -263,6 +263,10 @@ CV__DNN_INLINE_NS_BEGIN
public:
int input_zp, output_zp;
float input_sc, output_sc;
// quantization type flag. The perChannel default is true, that means it contains the parameters
// of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
bool per_channel;
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
};
@@ -368,6 +372,10 @@ CV__DNN_INLINE_NS_BEGIN
public:
int input_zp, output_zp;
float input_sc, output_sc;
// quantization type flag. The perChannel default is true, that means it contains the parameters
// of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
bool per_channel;
static Ptr<InnerProductLayerInt8> create(const LayerParams& params);
};
+3 -1
View File
@@ -621,8 +621,10 @@ CV__DNN_INLINE_NS_BEGIN
* @param calibData Calibration data to compute the quantization parameters.
* @param inputsDtype Datatype of quantized net's inputs. Can be CV_32F or CV_8S.
* @param outputsDtype Datatype of quantized net's outputs. Can be CV_32F or CV_8S.
* @param perChannel Quantization granularity of quantized Net. The default is true, that means quantize model
* in per-channel way (channel-wise). Set it false to quantize model in per-tensor way (or tensor-wise).
*/
CV_WRAP Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype);
CV_WRAP Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel=true);
/** @brief Returns input scale and zeropoint for a quantized Net.
* @param scales output parameter for returning input scales.