Support YOLOv3 model from Darknet

This commit is contained in:
Dmitry Kurtaev
2018-04-13 18:53:12 +03:00
parent 2129db6e91
commit 97fec07d96
8 changed files with 412 additions and 309 deletions
+62 -24
View File
@@ -35,12 +35,14 @@ using namespace dnn;
float confThreshold;
std::vector<std::string> classes;
void postprocess(Mat& frame, const Mat& out, Net& net);
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
void callback(int pos, void* userdata);
std::vector<String> getOutputsNames(const Net& net);
int main(int argc, char** argv)
{
CommandLineParser parser(argc, argv, keys);
@@ -115,9 +117,10 @@ int main(int argc, char** argv)
Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f);
net.setInput(imInfo, "im_info");
}
Mat out = net.forward();
std::vector<Mat> outs;
net.forward(outs, getOutputsNames(net));
postprocess(frame, out, net);
postprocess(frame, outs, net);
// Put efficiency information.
std::vector<double> layersTimes;
@@ -131,18 +134,19 @@ int main(int argc, char** argv)
return 0;
}
void postprocess(Mat& frame, const Mat& out, Net& net)
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
{
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
float* data = (float*)out.data;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
for (size_t i = 0; i < out.total(); i += 7)
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
@@ -161,7 +165,9 @@ void postprocess(Mat& frame, const Mat& out, Net& net)
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
for (size_t i = 0; i < out.total(); i += 7)
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
@@ -177,27 +183,45 @@ void postprocess(Mat& frame, const Mat& out, Net& net)
}
else if (outLayerType == "Region")
{
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
for (int i = 0; i < out.rows; ++i, data += out.cols)
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i)
{
Mat confidences = out.row(i).colRange(5, out.cols);
Point classIdPoint;
double confidence;
minMaxLoc(confidences, 0, &confidence, 0, &classIdPoint);
if (confidence > confThreshold)
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
int classId = classIdPoint.x;
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
drawPred(classId, (float)confidence, left, top, left + width, top + height, frame);
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > confThreshold)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, 0.4, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
@@ -227,3 +251,17 @@ void callback(int pos, void*)
{
confThreshold = pos * 0.01f;
}
std::vector<String> getOutputsNames(const Net& net)
{
static std::vector<String> names;
if (names.empty())
{
std::vector<int> outLayers = net.getUnconnectedOutLayers();
std::vector<String> layersNames = net.getLayerNames();
names.resize(outLayers.size());
for (size_t i = 0; i < outLayers.size(); ++i)
names[i] = layersNames[outLayers[i] - 1];
}
return names;
}