Merge pull request #22840 from zihaomu:optimze_conv_memory_usage
DNN: reduce the memory used in convolution layer * reduce the memory in winograd and disabel the test when usage memory is larger than 2gb. * remove VERY_LOG tag
This commit is contained in:
parent
ab912329b6
commit
0a650b573b
@ -198,6 +198,7 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
|||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
||||||
{
|
{
|
||||||
|
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
|
||||||
if (backend == DNN_BACKEND_HALIDE)
|
if (backend == DNN_BACKEND_HALIDE)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
|
||||||
@ -220,6 +221,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
|||||||
|
|
||||||
PERF_TEST_P_(DNNTestNetwork, YOLOv4)
|
PERF_TEST_P_(DNNTestNetwork, YOLOv4)
|
||||||
{
|
{
|
||||||
|
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
|
||||||
if (backend == DNN_BACKEND_HALIDE)
|
if (backend == DNN_BACKEND_HALIDE)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
if (target == DNN_TARGET_MYRIAD) // not enough resources
|
if (target == DNN_TARGET_MYRIAD) // not enough resources
|
||||||
|
|||||||
@ -2112,8 +2112,11 @@ public:
|
|||||||
int dilation_h = dilations[dilations.size() - 2];
|
int dilation_h = dilations[dilations.size() - 2];
|
||||||
int dilation_w = dilations.back();
|
int dilation_w = dilations.back();
|
||||||
|
|
||||||
|
// Winograd only works well on input h and w >12.
|
||||||
|
bool canUseWinograd = useWinograd && inputs[0].size[2] >= 12 && inputs[0].size[3] >= 12;
|
||||||
|
|
||||||
fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w,
|
fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w,
|
||||||
dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0], useWinograd);
|
dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0], canUseWinograd);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fastConv2dImpl)
|
if (fastConv2dImpl)
|
||||||
|
|||||||
@ -83,9 +83,7 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
weightsBufPtr[c*padded_ksize + k] = srcWeights[c*wstep + k];
|
weightsBufPtr[c*padded_ksize + k] = srcWeights[c*wstep + k];
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
else
|
else if(conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // winograd
|
||||||
{
|
|
||||||
if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // winograd
|
|
||||||
{
|
{
|
||||||
static const float ktm[8][3] = {
|
static const float ktm[8][3] = {
|
||||||
{1.0f, 0.0f, 0.0f},
|
{1.0f, 0.0f, 0.0f},
|
||||||
@ -162,7 +160,8 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
}
|
}
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
|
else if (conv->conv_type == _FX_CONV_TYPE_GENERIC)
|
||||||
|
{
|
||||||
// The weights are packed as
|
// The weights are packed as
|
||||||
// ngroups x (ceil((K/ngroups)/CONV_MR)*CONV_MR) x (Cg*Hk*Wk) x CONV_MR tensor
|
// ngroups x (ceil((K/ngroups)/CONV_MR)*CONV_MR) x (Cg*Hk*Wk) x CONV_MR tensor
|
||||||
int Kg = K/ngroups, Cg = max(C/ngroups, 1);
|
int Kg = K/ngroups, Cg = max(C/ngroups, 1);
|
||||||
@ -202,6 +201,8 @@ Ptr<FastConv2d> initFastConv2d(
|
|||||||
}
|
}
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
CV_Error(CV_StsUnsupportedFormat, "Unknown convolution type.");
|
||||||
|
|
||||||
// store bias; append some zero's to make sure that
|
// store bias; append some zero's to make sure that
|
||||||
// we can always read MR elements starting from any valid index
|
// we can always read MR elements starting from any valid index
|
||||||
@ -271,7 +272,7 @@ void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>
|
|||||||
CV_Assert(fusedAddMat.empty()); // Depthwise-Convolution layer should not be followed by Add layer.
|
CV_Assert(fusedAddMat.empty()); // Depthwise-Convolution layer should not be followed by Add layer.
|
||||||
return runDepthwise(input, output, conv, minval, maxval, activ, ifMinMaxAct);
|
return runDepthwise(input, output, conv, minval, maxval, activ, ifMinMaxAct);
|
||||||
}
|
}
|
||||||
else if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && inputShape[2] >= 12 && inputShape[3] >= 12) // winograd
|
else if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // winograd
|
||||||
{
|
{
|
||||||
CV_Assert(conv->weightsWinoBufPtr);
|
CV_Assert(conv->weightsWinoBufPtr);
|
||||||
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
|
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
|
||||||
|
|||||||
@ -29,7 +29,7 @@ public:
|
|||||||
void processNet(std::string weights, std::string proto,
|
void processNet(std::string weights, std::string proto,
|
||||||
Mat inp, const std::string& outputLayer = "",
|
Mat inp, const std::string& outputLayer = "",
|
||||||
std::string halideScheduler = "",
|
std::string halideScheduler = "",
|
||||||
double l1 = 0.0, double lInf = 0.0, double detectionConfThresh = 0.2)
|
double l1 = 0.0, double lInf = 0.0, double detectionConfThresh = 0.2, bool useWinograd = true)
|
||||||
{
|
{
|
||||||
checkBackend();
|
checkBackend();
|
||||||
l1 = l1 ? l1 : default_l1;
|
l1 = l1 ? l1 : default_l1;
|
||||||
@ -49,6 +49,7 @@ public:
|
|||||||
net.setInput(inp);
|
net.setInput(inp);
|
||||||
net.setPreferableBackend(backend);
|
net.setPreferableBackend(backend);
|
||||||
net.setPreferableTarget(target);
|
net.setPreferableTarget(target);
|
||||||
|
net.enableWinograd(useWinograd);
|
||||||
if (backend == DNN_BACKEND_HALIDE && !halideScheduler.empty())
|
if (backend == DNN_BACKEND_HALIDE && !halideScheduler.empty())
|
||||||
{
|
{
|
||||||
halideScheduler = findDataFile(halideScheduler);
|
halideScheduler = findDataFile(halideScheduler);
|
||||||
@ -347,7 +348,8 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
|
|||||||
}
|
}
|
||||||
|
|
||||||
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
|
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel",
|
||||||
"dnn/ssd_vgg16.prototxt", inp, "detection_out", "", scoreDiff, iouDiff);
|
"dnn/ssd_vgg16.prototxt", inp, "detection_out", "", scoreDiff,
|
||||||
|
iouDiff, 0.2, false);
|
||||||
expectNoFallbacksFromIE(net);
|
expectNoFallbacksFromIE(net);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -81,6 +81,7 @@ TEST(Test_Darknet, read_yolo_voc_stream)
|
|||||||
Net net = readNetFromDarknet(cfgFile, weightsFile);
|
Net net = readNetFromDarknet(cfgFile, weightsFile);
|
||||||
net.setInput(inp);
|
net.setInput(inp);
|
||||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||||
|
net.enableWinograd(false);
|
||||||
ref = net.forward();
|
ref = net.forward();
|
||||||
}
|
}
|
||||||
// Import from bytes array.
|
// Import from bytes array.
|
||||||
@ -92,6 +93,7 @@ TEST(Test_Darknet, read_yolo_voc_stream)
|
|||||||
Net net = readNetFromDarknet(cfg.data(), cfg.size(), weights.data(), weights.size());
|
Net net = readNetFromDarknet(cfg.data(), cfg.size(), weights.data(), weights.size());
|
||||||
net.setInput(inp);
|
net.setInput(inp);
|
||||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||||
|
net.enableWinograd(false);
|
||||||
Mat out = net.forward();
|
Mat out = net.forward();
|
||||||
normAssert(ref, out);
|
normAssert(ref, out);
|
||||||
}
|
}
|
||||||
@ -178,7 +180,8 @@ public:
|
|||||||
const std::vector<std::vector<int> >& refClassIds,
|
const std::vector<std::vector<int> >& refClassIds,
|
||||||
const std::vector<std::vector<float> >& refConfidences,
|
const std::vector<std::vector<float> >& refConfidences,
|
||||||
const std::vector<std::vector<Rect2d> >& refBoxes,
|
const std::vector<std::vector<Rect2d> >& refBoxes,
|
||||||
double scoreDiff, double iouDiff, float confThreshold = 0.24, float nmsThreshold = 0.4)
|
double scoreDiff, double iouDiff, float confThreshold = 0.24,
|
||||||
|
float nmsThreshold = 0.4, bool useWinograd = true)
|
||||||
{
|
{
|
||||||
checkBackend();
|
checkBackend();
|
||||||
|
|
||||||
@ -198,6 +201,7 @@ public:
|
|||||||
findDataFile("dnn/" + weights, false));
|
findDataFile("dnn/" + weights, false));
|
||||||
net.setPreferableBackend(backend);
|
net.setPreferableBackend(backend);
|
||||||
net.setPreferableTarget(target);
|
net.setPreferableTarget(target);
|
||||||
|
net.enableWinograd(useWinograd);
|
||||||
net.setInput(inp);
|
net.setInput(inp);
|
||||||
std::vector<Mat> outs;
|
std::vector<Mat> outs;
|
||||||
net.forward(outs, net.getUnconnectedOutLayersNames());
|
net.forward(outs, net.getUnconnectedOutLayersNames());
|
||||||
@ -280,18 +284,19 @@ public:
|
|||||||
const std::vector<int>& refClassIds,
|
const std::vector<int>& refClassIds,
|
||||||
const std::vector<float>& refConfidences,
|
const std::vector<float>& refConfidences,
|
||||||
const std::vector<Rect2d>& refBoxes,
|
const std::vector<Rect2d>& refBoxes,
|
||||||
double scoreDiff, double iouDiff, float confThreshold = 0.24, float nmsThreshold = 0.4)
|
double scoreDiff, double iouDiff, float confThreshold = 0.24,
|
||||||
|
float nmsThreshold = 0.4, bool useWinograd = true)
|
||||||
{
|
{
|
||||||
testDarknetModel(cfg, weights,
|
testDarknetModel(cfg, weights,
|
||||||
std::vector<std::vector<int> >(1, refClassIds),
|
std::vector<std::vector<int> >(1, refClassIds),
|
||||||
std::vector<std::vector<float> >(1, refConfidences),
|
std::vector<std::vector<float> >(1, refConfidences),
|
||||||
std::vector<std::vector<Rect2d> >(1, refBoxes),
|
std::vector<std::vector<Rect2d> >(1, refBoxes),
|
||||||
scoreDiff, iouDiff, confThreshold, nmsThreshold);
|
scoreDiff, iouDiff, confThreshold, nmsThreshold, useWinograd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void testDarknetModel(const std::string& cfg, const std::string& weights,
|
void testDarknetModel(const std::string& cfg, const std::string& weights,
|
||||||
const cv::Mat& ref, double scoreDiff, double iouDiff,
|
const cv::Mat& ref, double scoreDiff, double iouDiff,
|
||||||
float confThreshold = 0.24, float nmsThreshold = 0.4)
|
float confThreshold = 0.24, float nmsThreshold = 0.4, bool useWinograd = true)
|
||||||
{
|
{
|
||||||
CV_Assert(ref.cols == 7);
|
CV_Assert(ref.cols == 7);
|
||||||
std::vector<std::vector<int> > refClassIds;
|
std::vector<std::vector<int> > refClassIds;
|
||||||
@ -318,7 +323,7 @@ public:
|
|||||||
refBoxes[batchId].push_back(box);
|
refBoxes[batchId].push_back(box);
|
||||||
}
|
}
|
||||||
testDarknetModel(cfg, weights, refClassIds, refScores, refBoxes,
|
testDarknetModel(cfg, weights, refClassIds, refScores, refBoxes,
|
||||||
scoreDiff, iouDiff, confThreshold, nmsThreshold);
|
scoreDiff, iouDiff, confThreshold, nmsThreshold, useWinograd);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -396,7 +401,7 @@ TEST_P(Test_Darknet_nets, YoloVoc)
|
|||||||
|
|
||||||
{
|
{
|
||||||
SCOPED_TRACE("batch size 1");
|
SCOPED_TRACE("batch size 1");
|
||||||
testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||||
@ -410,7 +415,7 @@ TEST_P(Test_Darknet_nets, YoloVoc)
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
SCOPED_TRACE("batch size 2");
|
SCOPED_TRACE("batch size 2");
|
||||||
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, nmsThreshold);
|
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, nmsThreshold, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||||
@ -599,7 +604,7 @@ TEST_P(Test_Darknet_nets, YOLOv3)
|
|||||||
{
|
{
|
||||||
applyTestTag(
|
applyTestTag(
|
||||||
CV_TEST_TAG_LONG,
|
CV_TEST_TAG_LONG,
|
||||||
(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB),
|
CV_TEST_TAG_MEMORY_2GB,
|
||||||
CV_TEST_TAG_DEBUG_VERYLONG
|
CV_TEST_TAG_DEBUG_VERYLONG
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -656,7 +661,7 @@ TEST_P(Test_Darknet_nets, YOLOv3)
|
|||||||
|
|
||||||
{
|
{
|
||||||
SCOPED_TRACE("batch size 1");
|
SCOPED_TRACE("batch size 1");
|
||||||
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(INF_ENGINE_RELEASE)
|
#if defined(INF_ENGINE_RELEASE)
|
||||||
@ -674,7 +679,7 @@ TEST_P(Test_Darknet_nets, YOLOv3)
|
|||||||
|
|
||||||
{
|
{
|
||||||
SCOPED_TRACE("batch size 2");
|
SCOPED_TRACE("batch size 2");
|
||||||
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -682,7 +687,7 @@ TEST_P(Test_Darknet_nets, YOLOv4)
|
|||||||
{
|
{
|
||||||
applyTestTag(
|
applyTestTag(
|
||||||
CV_TEST_TAG_LONG,
|
CV_TEST_TAG_LONG,
|
||||||
(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB),
|
CV_TEST_TAG_MEMORY_2GB,
|
||||||
CV_TEST_TAG_DEBUG_VERYLONG
|
CV_TEST_TAG_DEBUG_VERYLONG
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -756,7 +761,7 @@ TEST_P(Test_Darknet_nets, YOLOv4)
|
|||||||
|
|
||||||
{
|
{
|
||||||
SCOPED_TRACE("batch size 1");
|
SCOPED_TRACE("batch size 1");
|
||||||
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -792,7 +797,7 @@ TEST_P(Test_Darknet_nets, YOLOv4)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||||
@ -877,7 +882,7 @@ TEST_P(Test_Darknet_nets, YOLOv4x_mish)
|
|||||||
{
|
{
|
||||||
applyTestTag(
|
applyTestTag(
|
||||||
CV_TEST_TAG_LONG,
|
CV_TEST_TAG_LONG,
|
||||||
(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB),
|
CV_TEST_TAG_MEMORY_2GB,
|
||||||
CV_TEST_TAG_DEBUG_VERYLONG
|
CV_TEST_TAG_DEBUG_VERYLONG
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -939,7 +944,7 @@ TEST_P(Test_Darknet_nets, YOLOv4x_mish)
|
|||||||
|
|
||||||
{
|
{
|
||||||
SCOPED_TRACE("batch size 1");
|
SCOPED_TRACE("batch size 1");
|
||||||
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -958,7 +963,7 @@ TEST_P(Test_Darknet_nets, YOLOv4x_mish)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
|
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, 0.4, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user