JeVoisBase  1.9
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
DetectionDNN.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #include <jevois/Core/Module.H>
19 #include <jevois/Debug/Timer.H>
21 #include <opencv2/core/core.hpp>
22 #include <opencv2/dnn.hpp>
23 #include <opencv2/imgproc.hpp>
24 #include <fstream>
25 
26 // icon from opencv
27 
28 static jevois::ParameterCategory const ParamCateg("Darknet YOLO Options");
29 
30 //! Parameter \relates DetectionDNN
31 JEVOIS_DECLARE_PARAMETER(classnames, std::string, "Path to a text file with names of classes to label detected objects",
32  "/jevois/share/opencv-dnn/detection/opencv_face_detector.classes", ParamCateg);
33 
34 //! Parameter \relates DetectionDNN
35 JEVOIS_DECLARE_PARAMETER(configname, std::string, "Path to a text file that contains network configuration. "
36  "Can have extension .prototxt (Caffe), .pbtxt (TensorFlow), or .cfg (Darknet).",
37  "/jevois/share/opencv-dnn/detection/opencv_face_detector.prototxt", ParamCateg);
38 
39 //! Parameter \relates DetectionDNN
40 JEVOIS_DECLARE_PARAMETER(modelname, std::string, "Path to a binary file of model contains trained weights. "
41  "Can have extension .caffemodel (Caffe), .pb (TensorFlow), .t7 or .net (Torch), "
42  "or .weights (Darknet).",
43  "/jevois/share/opencv-dnn/detection/opencv_face_detector.caffemodel", ParamCateg);
44 
45 //! Parameter \relates DetectionDNN
46 JEVOIS_DECLARE_PARAMETER(netin, cv::Size, "Width and height (in pixels) of the neural network input layer, or [0 0] "
47  "to make it match camera frame size. NOTE: for YOLO v3 sizes must be multiples of 32.",
48  cv::Size(160, 120), ParamCateg);
49 
50 //! Parameter \relates DetectionDNN
51 JEVOIS_DECLARE_PARAMETER(thresh, float, "Detection threshold in percent confidence",
52  50.0F, jevois::Range<float>(0.0F, 100.0F), ParamCateg);
53 
54 //! Parameter \relates DetectionDNN
55 JEVOIS_DECLARE_PARAMETER(nms, float, "Non-maximum suppression intersection-over-union threshold in percent",
56  45.0F, jevois::Range<float>(0.0F, 100.0F), ParamCateg);
57 
58 //! Parameter \relates DetectionDNN
59 JEVOIS_DECLARE_PARAMETER(rgb, bool, "When true, model works with RGB input images instead BGR ones",
60  true, ParamCateg);
61 
62 //! Parameter \relates DetectionDNN
63 JEVOIS_DECLARE_PARAMETER(scale, float, "Value scaling factor applied to input pixels",
64  2.0F / 255.0F, ParamCateg);
65 
66 //! Parameter \relates DetectionDNN
67 JEVOIS_DECLARE_PARAMETER(mean, cv::Scalar, "Mean BGR value subtracted from input image",
68  cv::Scalar(127.5F, 127.5F, 127.5F), ParamCateg);
69 
70 //! Detect and recognize multiple objects in scenes using OpenCV Deep Neural Nets (DNN)
71 /*! This module runs an object detection deep neural network using the OpenCV DNN library. Detection networks analyze a
72  whole scene and produce a number of bounding boxes around detected objects, together with identity labels and
73  confidence scores for each detected box.
74 
75  This module runs the selected deep neural network and shows all detections obtained.
76 
77  Note that by default this module runs the OpenCV Face Detector DNN which can detect human faces.
78 
79  Included with the standard JeVois distribution are the following networks:
80 
81  - OpenCV Face Detector, Caffe model
82  - MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
83  - MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
84  - MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
85  - Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
86  - Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
87 
88  See the module's \b params.cfg file to switch network. Object categories are as follows:
89 
90  - The 80 COCO object categories are: person, bicycle, car, motorbike, aeroplane, bus, train, truck, boat, traffic,
91  fire, stop, parking, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella,
92  handbag, tie, suitcase, frisbee, skis, snowboard, sports, kite, baseball, baseball, skateboard, surfboard, tennis,
93  bottle, wine, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot, pizza, donut,
94  cake, chair, sofa, pottedplant, bed, diningtable, toilet, tvmonitor, laptop, mouse, remote, keyboard, cell,
95  microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy, hair, toothbrush.
96 
97  - The 20 Pascal-VOC object categories are: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow,
98  diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor.
99 
100  Sometimes it will make mistakes! The performance of yolov3-tiny is about 33.1% correct (mean average precision) on
101  the COCO test set. The OpenCV Face Detector is quite fast and robust!
102 
103  Speed and network size
104  ----------------------
105 
106  The parameter \p netin allows you to rescale the neural network to the specified size. Beware that this will only
107  work if the network used is fully convolutional (as is the case with the default networks listed above). This not
108  only allows you to adjust processing speed (and, conversely, accuracy), but also to better match the network to the
109  input images (e.g., the default size for tiny-yolo is 416x416, and, thus, passing it a input image of size 640x480
110  will result in first scaling that input to 416x312, then letterboxing it by adding gray borders on top and bottom so
111  that the final input to the network is 416x416). This letterboxing can be completely avoided by just resizing the
112  network to 320x240.
113 
114  Here are expected processing speeds for the OpenCV Face Detector:
115  - when netin = [320 240], processes 320x240 inputs, about 650ms/image (1.5 frames/s)
116  - when netin = [160 120], processes 160x120 inputs, about 190ms/image (5.0 frames/s)
117 
118  Serial messages
119  ---------------
120 
121  When detections are found which are above threshold, one message will be sent for each detected
122  object (i.e., for each box that gets drawn when USB outputs are used), using a standardized 2D message:
123  + Serial message type: \b 2D
124  + `id`: the category of the recognized object, followed by ':' and the confidence score in percent
125  + `x`, `y`, or vertices: standardized 2D coordinates of object center or corners
126  + `w`, `h`: standardized object size
127  + `extra`: any number of additional category:score pairs which had an above-threshold score for that box
128 
129  See \ref UserSerialStyle for more on standardized serial messages, and \ref coordhelpers for more info on
130  standardized coordinates.
131 
132  This code is heavily inspired from:
133  https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.cpp
134 
135  @author Laurent Itti
136 
137  @displayname Detection DNN
138  @videomapping NONE 0 0 0.0 YUYV 640 480 15.0 JeVois DetectionDNN
139  @videomapping YUYV 640 498 15.0 YUYV 640 480 15.0 JeVois DetectionDNN
140  @email itti\@usc.edu
141  @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
142  @copyright Copyright (C) 2018 by Laurent Itti, iLab and the University of Southern California
143  @mainurl http://jevois.org
144  @supporturl http://jevois.org/doc
145  @otherurl http://iLab.usc.edu
146  @license GPL v3
147  @distribution Unrestricted
148  @restrictions None
149  \ingroup modules */
151  public jevois::Parameter<classnames, configname, modelname, netin, thresh, nms, rgb, scale, mean>
152 {
153  public:
154  // ####################################################################################################
155  //! Constructor
156  // ####################################################################################################
157  DetectionDNN(std::string const & instance) : jevois::StdModule(instance)
158  { }
159 
160  // ####################################################################################################
161  //! Virtual destructor for safe inheritance
162  // ####################################################################################################
163  virtual ~DetectionDNN()
164  { }
165 
166  // ####################################################################################################
167  //! Initialization
168  // ####################################################################################################
169  virtual void postInit() override
170  {
171  // Load the class names:
172  std::ifstream ifs(classnames::get());
173  if (ifs.is_open() == false) LFATAL("Class names file " << classnames::get() << " not found");
174  std::string line;
175  while (std::getline(ifs, line)) itsClasses.push_back(line);
176 
177  // Create and load the network:
178  itsNet = cv::dnn::readNet(modelname::get(), configname::get());
179  itsNet.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
180  itsNet.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
181 
182  // Get names of the network's output layers:
183  itsOutLayers = itsNet.getUnconnectedOutLayers();
184  std::vector<cv::String> layersNames = itsNet.getLayerNames();
185  itsOutNames.resize(itsOutLayers.size());
186  for (size_t i = 0; i < itsOutLayers.size(); ++i) itsOutNames[i] = layersNames[itsOutLayers[i] - 1];
187  itsOutLayerType = itsNet.getLayer(itsOutLayers[0])->type;
188  }
189 
190  // ####################################################################################################
191  //! Un-initialization
192  // ####################################################################################################
193  virtual void postUninit() override
194  { }
195 
196  // ####################################################################################################
197  //! Post-processing to extract boxes from network outputs
198  // ####################################################################################################
199  void postprocess(cv::Mat const & frame, std::vector<cv::Mat> const & outs, jevois::RawImage * outframe = nullptr)
200  {
201  float const confThreshold = thresh::get() * 0.01F;
202  float const nmsThreshold = nms::get() * 0.01F;
203 
204  std::vector<int> classIds;
205  std::vector<float> confidences;
206  std::vector<cv::Rect> boxes;
207  if (itsNet.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
208  {
209  // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
210  // a vector of values [batchId, classId, confidence, left, top, right, bottom]
211  if (outs.size() != 1) LFATAL("Malformed output layers");
212  float* data = (float*)outs[0].data;
213  for (size_t i = 0; i < outs[0].total(); i += 7)
214  {
215  float confidence = data[i + 2];
216  if (confidence > confThreshold)
217  {
218  int left = (int)data[i + 3];
219  int top = (int)data[i + 4];
220  int right = (int)data[i + 5];
221  int bottom = (int)data[i + 6];
222  int width = right - left + 1;
223  int height = bottom - top + 1;
224  classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
225  boxes.push_back(cv::Rect(left, top, width, height));
226  confidences.push_back(confidence);
227  }
228  }
229  }
230  else if (itsOutLayerType == "DetectionOutput")
231  {
232  // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
233  // a vector of values [batchId, classId, confidence, left, top, right, bottom]
234  if (outs.size() != 1) LFATAL("Malformed output layers");
235  float* data = (float*)outs[0].data;
236  for (size_t i = 0; i < outs[0].total(); i += 7)
237  {
238  float confidence = data[i + 2];
239  if (confidence > confThreshold)
240  {
241  int left = (int)(data[i + 3] * frame.cols);
242  int top = (int)(data[i + 4] * frame.rows);
243  int right = (int)(data[i + 5] * frame.cols);
244  int bottom = (int)(data[i + 6] * frame.rows);
245  int width = right - left + 1;
246  int height = bottom - top + 1;
247  classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
248  boxes.push_back(cv::Rect(left, top, width, height));
249  confidences.push_back(confidence);
250  }
251  }
252  }
253  else if (itsOutLayerType == "Region")
254  {
255  for (size_t i = 0; i < outs.size(); ++i)
256  {
257  // Network produces output blob with a shape NxC where N is a number of detected objects and C is a number of
258  // classes + 4 where the first 4 numbers are [center_x, center_y, width, height]
259  float* data = (float*)outs[i].data;
260  for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
261  {
262  cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
263  cv::Point classIdPoint;
264  double confidence;
265  minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
266  if (confidence > confThreshold)
267  {
268  int centerX = (int)(data[0] * frame.cols);
269  int centerY = (int)(data[1] * frame.rows);
270  int width = (int)(data[2] * frame.cols);
271  int height = (int)(data[3] * frame.rows);
272  int left = centerX - width / 2;
273  int top = centerY - height / 2;
274 
275  classIds.push_back(classIdPoint.x);
276  confidences.push_back((float)confidence);
277  boxes.push_back(cv::Rect(left, top, width, height));
278  }
279  }
280  }
281  }
282  else LFATAL("Unknown output layer type: " << itsOutLayerType);
283 
284  // Cleanup overlapping boxes:
285  std::vector<int> indices;
286  cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
287 
288  // Send serial messages and draw boxes:
289  for (size_t i = 0; i < indices.size(); ++i)
290  {
291  int idx = indices[i];
292  cv::Rect const & box = boxes[idx];
293  std::vector<jevois::ObjReco> data;
294  float const conf = confidences[idx] * 100.0F;
295  std::string name;
296  if (classIds[idx] < itsClasses.size()) name = itsClasses[classIds[idx]]; else name = "Oooops";
297  data.push_back({ conf, name });
298 
299  std::string label = jevois::sformat("%s: %.2f", name.c_str(), conf);
300 
301  if (outframe)
302  {
303  jevois::rawimage::drawRect(*outframe, box.x, box.y, box.width, box.height, 2, jevois::yuyv::LightGreen);
304  jevois::rawimage::writeText(*outframe, label, box.x + 6, box.y + 2, jevois::yuyv::LightGreen,
306  }
307 
308  sendSerialObjDetImg2D(frame.cols, frame.rows, box.x, box.y, box.width, box.height, data);
309  }
310  }
311 
312  // ####################################################################################################
313  //! Processing function, no video output
314  // ####################################################################################################
315  virtual void process(jevois::InputFrame && inframe) override
316  {
317  // Wait for next available camera image:
318  jevois::RawImage const inimg = inframe.get();
319  unsigned int const w = inimg.width, h = inimg.height;
320 
321  // Convert input image to BGR for predictions:
322  cv::Mat cvimg = jevois::rawimage::convertToCvBGR(inimg);
323 
324  // Let camera know we are done processing the input image:
325  inframe.done();
326 
327  // Extract blob that will be sent to network:
328  cv::Mat blob;
329  cv::dnn::blobFromImage(cvimg, blob, scale::get(), netin::get(), mean::get(), rgb::get(), false);
330 
331  // Launch the predictions:
332  itsNet.setInput(blob);
333  std::vector<cv::Mat> outs; itsNet.forward(outs, itsOutNames);
334 
335  // Post-process the outputs and send serial messages:
336  postprocess(cvimg, outs);
337  }
338 
339  // ####################################################################################################
340  //! Processing function with video output to USB
341  // ####################################################################################################
342  virtual void process(jevois::InputFrame && inframe, jevois::OutputFrame && outframe) override
343  {
344  static jevois::Timer timer("processing", 10, LOG_DEBUG);
345 
346  // Wait for next available camera image:
347  jevois::RawImage const inimg = inframe.get();
348 
349  timer.start();
350 
351  // We only handle one specific pixel format, and any image size in this module:
352  unsigned int const w = inimg.width, h = inimg.height;
353  inimg.require("input", w, h, V4L2_PIX_FMT_YUYV);
354 
355  // While we process it, start a thread to wait for out frame and paste the input into it:
356  jevois::RawImage outimg;
357  auto paste_fut = std::async(std::launch::async, [&]() {
358  outimg = outframe.get();
359  outimg.require("output", w, h + 18, inimg.fmt);
360 
361  // Paste the current input image:
362  jevois::rawimage::paste(inimg, outimg, 0, 0);
363  jevois::rawimage::writeText(outimg, "JeVois ObjectDetection DNN", 3, 3, jevois::yuyv::White);
364  jevois::rawimage::drawFilledRect(outimg, 0, h, w, outimg.height - h, jevois::yuyv::Black);
365  });
366 
367  // Convert input image to BGR for predictions:
368  cv::Mat cvimg = jevois::rawimage::convertToCvBGR(inimg);
369 
370  // Extract blob that will be sent to network:
371  cv::Mat blob;
372  cv::dnn::blobFromImage(cvimg, blob, scale::get(), netin::get(), mean::get(), rgb::get(), false);
373 
374  // Let camera know we are done processing the input image:
375  inframe.done();
376 
377  // Launch the predictions:
378  itsNet.setInput(blob);
379  std::vector<cv::Mat> outs; itsNet.forward(outs, itsOutNames);
380 
381  // Wait for paste to finish up:
382  paste_fut.get();
383 
384  // Post-process the outputs, draw them. and send serial messages:
385  postprocess(cvimg, outs, &outimg);
386 
387  // Display efficiency information:
388  std::vector<double> layersTimes;
389  double freq = cv::getTickFrequency() / 1000;
390  double t = itsNet.getPerfProfile(layersTimes) / freq;
391  std::string label = jevois::sformat("Inference time: %.2f ms", t);
392  jevois::rawimage::writeText(outimg, label, 3, h + 3, jevois::yuyv::White);
393 
394  // Show processing fps:
395  std::string const & fpscpu = timer.stop();
396  jevois::rawimage::writeText(outimg, fpscpu, 3, h - 13, jevois::yuyv::White);
397 
398  // Send the output image with our processing results to the host over USB:
399  outframe.send();
400  }
401 
402  // ####################################################################################################
403  protected:
404  std::vector<std::string> itsClasses;
406  std::vector<cv::String> itsOutNames;
407  std::vector<int> itsOutLayers;
408  std::string itsOutLayerType;
409 };
410 
411 // Allow the module to be loaded as a shared object (.so) file:
virtual void postInit() override
Initialization.
Definition: DetectionDNN.C:169
DetectionDNN(std::string const &instance)
Constructor.
Definition: DetectionDNN.C:157
std::string name
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
cv::Mat convertToCvBGR(RawImage const &src)
unsigned int height
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
unsigned int fmt
virtual void process(jevois::InputFrame &&inframe) override
Processing function, no video output.
Definition: DetectionDNN.C:315
virtual void postUninit() override
Un-initialization.
Definition: DetectionDNN.C:193
std::vector< cv::String > itsOutNames
Definition: DetectionDNN.C:406
virtual ~DetectionDNN()
Virtual destructor for safe inheritance.
Definition: DetectionDNN.C:163
std::vector< std::string > itsClasses
Definition: DetectionDNN.C:404
StdModule(std::string const &instance)
Detect and recognize multiple objects in scenes using OpenCV Deep Neural Nets (DNN) ...
Definition: DetectionDNN.C:150
std::vector< int > itsOutLayers
Definition: DetectionDNN.C:407
Net
Definition: Darknet.H:40
cv::dnn::Net itsNet
Definition: DetectionDNN.C:405
std::string const & stop()
JEVOIS_DECLARE_PARAMETER(camparams, std::string, "File stem of camera parameters, or empty. Camera resolution " "will be appended, as well as a .cfg extension. For example, specifying 'camera_para' " "here and running the camera sensor at 320x240 will attempt to load " "camera_para320x240.dat from within the module's directory.", "camera_para", ParamCateg)
Parameter.
#define LFATAL(msg)
void drawFilledRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int col)
virtual void process(jevois::InputFrame &&inframe, jevois::OutputFrame &&outframe) override
Processing function with video output to USB.
Definition: DetectionDNN.C:342
JEVOIS_REGISTER_MODULE(DetectionDNN)
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
unsigned int width
void sendSerialObjDetImg2D(unsigned int camw, unsigned int camh, float x, float y, float w, float h, std::vector< ObjReco > const &res)
void paste(RawImage const &src, RawImage &dest, int dx, int dy)
std::string itsOutLayerType
Definition: DetectionDNN.C:408
void require(char const *info, unsigned int w, unsigned int h, unsigned int f) const
void postprocess(cv::Mat const &frame, std::vector< cv::Mat > const &outs, jevois::RawImage *outframe=nullptr)
Post-processing to extract boxes from network outputs.
Definition: DetectionDNN.C:199