JeVois  1.16
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
PostProcessorDetect.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
21 #include <jevois/DNN/Utils.H>
22 #include <jevois/Util/Utils.H>
24 #include <jevois/Core/Engine.H>
25 #include <jevois/Core/Module.H>
26 #include <jevois/GPU/GUIhelper.H>
27 
28 #include <opencv2/dnn.hpp>
29 
30 // ####################################################################################################
32 { }
33 
34 // ####################################################################################################
36 {
37  classes::freeze(doit);
38  detecttype::freeze(doit);
39  anchors::freeze(doit);
40 }
41 
42 // ####################################################################################################
43 void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::classes const & JEVOIS_UNUSED_PARAM(param),
44  std::string const & val)
45 {
46  if (val.empty()) { itsLabels.clear(); return; }
47 
48  // Get the dataroot of our network. We assume that there is a sub-component named "network" that is a sibling of us:
49  std::vector<std::string> dd = jevois::split(Component::descriptor(), ":");
50  dd.back() = "network"; dd.emplace_back("dataroot");
51  std::string const dataroot = engine()->getParamStringUnique(jevois::join(dd, ":"));
52 
53  itsLabels = jevois::dnn::readLabelsFile(jevois::absolutePath(dataroot, val));
54 }
55 
56 // ####################################################################################################
57 void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::anchors const & JEVOIS_UNUSED_PARAM(param),
58  std::string const & val)
59 {
60  if (val.empty()) { itsAnchors.clear(); return; }
61  auto tok = jevois::split(val, "\\s*;\\s*");
62  if (tok.size() >= 64) LFATAL("Maximum 32 anchors is supported");
63  for (std::string const & t : tok)
64  {
65  std::array<float, 64> a { };
66  auto atok = jevois::split(t, "\\s*,\\s*");
67  int i = 0;
68  for (std::string const & at : atok) a[i++] = std::stoi(at);
69  itsAnchors.emplace_back(std::move(a));
70  }
71 }
72 
73 // ####################################################################################################
74 void jevois::dnn::PostProcessorDetect::process(std::vector<cv::Mat> const & outs, jevois::dnn::PreProcessor * preproc)
75 {
76  float const confThreshold = thresh::get() * 0.01F;
77  float const nmsThreshold = nms::get() * 0.01F;
78  int const fudge = classoffset::get();
79  itsImageSize = preproc->imagesize();
80 
81  // To draw boxes, we will need to:
82  // - scale from [0..1]x[0..1] to blobw x blobh
83  // - scale and center from blobw x blobh to input image w x h, provided by PreProcessor::b2i()
84  // - when using the GUI, we further scale and translate to OpenGL display coordinates using GUIhelper::i2d()
85  // Here we assume that the first blob sets the input size.
86  cv::Size const bsiz = preproc->blobsize(0);
87 
88  // We keep 3 vectors here instead of creating a class to hold all of the data because OpenCV will need that for
89  // non-maximum suppression:
90  std::vector<int> classIds;
91  std::vector<float> confidences;
92  std::vector<cv::Rect> boxes;
93 
94  // Here we just scale the coords from [0..1]x[0..1] to blobw x blobh:
95  switch(detecttype::get())
96  {
97  // ----------------------------------------------------------------------------------------------------
98  case jevois::dnn::postprocessor::DetectType::FasterRCNN:
99  {
100  // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
101  // a vector of values [batchId, classId, confidence, left, top, right, bottom]
102  if (outs.size() != 1) LFATAL("Malformed output layers");
103  cv::Mat const & out = outs[0]; cv::MatSize const & msiz = out.size;
104  if (msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7) LFATAL("Incorrect tensor size: need 1x1xNx7");
105 
106  float const * data = (float const *)out.data;
107  for (size_t i = 0; i < out.total(); i += 7)
108  {
109  float confidence = data[i + 2];
110  if (confidence > confThreshold)
111  {
112  int left = (int)data[i + 3];
113  int top = (int)data[i + 4];
114  int right = (int)data[i + 5];
115  int bottom = (int)data[i + 6];
116  int width = right - left + 1;
117  int height = bottom - top + 1;
118  classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
119  boxes.push_back(cv::Rect(left, top, width, height));
120  confidences.push_back(confidence);
121  }
122  }
123  }
124  break;
125 
126  // ----------------------------------------------------------------------------------------------------
127  case jevois::dnn::postprocessor::DetectType::SSD:
128  {
129  // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
130  // a vector of values [batchId, classId, confidence, left, top, right, bottom]
131  if (outs.size() != 1) LFATAL("Malformed output layers");
132  cv::Mat const & out = outs[0]; cv::MatSize msiz = out.size;
133  if (msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7) LFATAL("Incorrect tensor size: need 1x1xNx7");
134 
135  float const * data = (float const *)out.data;
136  for (size_t i = 0; i < out.total(); i += 7)
137  {
138  float confidence = data[i + 2];
139  if (confidence > confThreshold)
140  {
141  int left = (int)(data[i + 3] * bsiz.width);
142  int top = (int)(data[i + 4] * bsiz.height);
143  int right = (int)(data[i + 5] * bsiz.width);
144  int bottom = (int)(data[i + 6] * bsiz.height);
145  int width = right - left + 1;
146  int height = bottom - top + 1;
147  classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
148  boxes.push_back(cv::Rect(left, top, width, height));
149  confidences.push_back(confidence);
150  }
151  }
152  }
153  break;
154 
155  // ----------------------------------------------------------------------------------------------------
156  case jevois::dnn::postprocessor::DetectType::TPUSSD:
157  {
158  // Network produces 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count
159  // (see GetDetectionResults in detection/adapter.cc of libcoral):
160  if (outs.size() != 4) LFATAL("Malformed output layers");
161  cv::Mat const & bboxes = outs[0];
162  cv::Mat const & ids = outs[1];
163  cv::Mat const & scores = outs[2];
164  cv::Mat const & count = outs[3];
165  if (bboxes.total() != 4 * ids.total()) LFATAL("Incorrect bbox vs ids sizes");
166  if (bboxes.total() != 4 * scores.total()) LFATAL("Incorrect bbox vs scores sizes");
167  if (count.total() != 1) LFATAL("Incorrect size for count");
168  size_t num = count.at<float>(0);
169  if (num > ids.total()) LFATAL("Too many detections: " << num << " for only " << ids.total() << " ids");
170  float const * bb = (float const *)bboxes.data;
171 
172  for (size_t i = 0; i < num; ++i)
173  {
174  if (scores.at<float>(i) < confThreshold) continue;
175 
176  int top = (int)(bb[4 * i] * bsiz.height);
177  int left = (int)(bb[4 * i + 1] * bsiz.width);
178  int bottom = (int)(bb[4 * i + 2] * bsiz.height);
179  int right = (int)(bb[4 * i + 3] * bsiz.width);
180  int width = right - left + 1;
181  int height = bottom - top + 1;
182  classIds.push_back((int)(ids.at<float>(i)) + fudge); // Skip 0th background class id.
183  boxes.push_back(cv::Rect(left, top, width, height));
184  confidences.push_back(scores.at<float>(i));
185  }
186  }
187  break;
188 
189  // ----------------------------------------------------------------------------------------------------
190  case jevois::dnn::postprocessor::DetectType::YOLO:
191  {
192  for (size_t i = 0; i < outs.size(); ++i)
193  {
194  // Network produces output blob with a shape NxC where N is a number of detected objects and C is a number of
195  // classes + 4 where the first 4 numbers are [center_x, center_y, width, height]
196  cv::Mat const & out = outs[i];
197  if (out.size.dims() != 2) LFATAL("Incorrect tensor size: need NxC");
198 
199  float const * data = (float const *)out.data;
200  for (int j = 0; j < out.rows; ++j, data += out.cols)
201  {
202  cv::Mat scores = out.row(j).colRange(5, out.cols);
203  cv::Point classIdPoint;
204  double confidence;
205  cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
206  if (confidence > confThreshold)
207  {
208  int centerX = (int)(data[0] * bsiz.width);
209  int centerY = (int)(data[1] * bsiz.height);
210  int width = (int)(data[2] * bsiz.width);
211  int height = (int)(data[3] * bsiz.height);
212  int left = centerX - width / 2;
213  int top = centerY - height / 2;
214 
215  classIds.push_back(classIdPoint.x);
216  confidences.push_back((float)confidence);
217  boxes.push_back(cv::Rect(left, top, width, height));
218  }
219  }
220  }
221  }
222  break;
223 
224  // ----------------------------------------------------------------------------------------------------
225  case jevois::dnn::postprocessor::DetectType::RAWYOLOface:
226  {
227  if (outs.size() != 1) LFATAL("Expected 1 output tensor but received " << outs.size());
228  static float const defaultbiases[10] {1.08*8,1.19*8, 3.42*8,4.41*8, 6.63*8,11.38*8, 9.42*8,5.11*8, 16.62*8,10.52*8};
229  float const * biases = itsAnchors.size() >= 1 ? itsAnchors[0].data() : defaultbiases;
230  jevois::dnn::npu::yolo(outs[0], classIds, confidences, boxes, itsLabels.size(), biases, 0,
231  confThreshold, bsiz, fudge);
232  }
233  break;
234 
235  // ----------------------------------------------------------------------------------------------------
236  case jevois::dnn::postprocessor::DetectType::RAWYOLOv2:
237  {
238  if (outs.size() != 1) LFATAL("Expected 1 output tensor but received " << outs.size());
239  static float const defaultbiases[10] { 0.738768*8,0.874946*8,2.422040*8,2.657040*8,4.309710*8,
240  7.044930*8,10.246000*8,4.594280*8,12.686800*8,11.874100*8 };
241  float const * biases = itsAnchors.size() >= 1 ? itsAnchors[0].data() : defaultbiases;
242  // Myriad-X model gives [1, 21125], reshape to [5, 25, 13, 13] for VOC or
243  // [1,71825] for COCO with 8 classes
244  if (outs[0].size.dims() == 2)
245  {
246  int const n = outs[0].size[1] / (13 * 13);
247  cv::Mat o = outs[0].reshape(0, { 1, n, 13, 13 });
248  jevois::dnn::npu::yolo(o, classIds, confidences, boxes, itsLabels.size(), biases, 0,
249  confThreshold, bsiz, fudge);
250  }
251  else
252  jevois::dnn::npu::yolo(outs[0], classIds, confidences, boxes, itsLabels.size(), biases, 0,
253  confThreshold, bsiz, fudge);
254  }
255  break;
256 
257  // ----------------------------------------------------------------------------------------------------
258  case jevois::dnn::postprocessor::DetectType::RAWYOLOv3:
259  case jevois::dnn::postprocessor::DetectType::RAWYOLOv4:
260  {
261  if (outs.size() != 3) LFATAL("Expected 3 output tensors but received " << outs.size());
262  static float const defaultbiases[18] {10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
263  59, 119, 116, 90, 156, 198, 373, 326};
264  float const * b0 = itsAnchors.size() >= 1 ? itsAnchors[0].data() : defaultbiases;
265  float const * b1 = itsAnchors.size() >= 2 ? itsAnchors[1].data() : b0;
266  float const * b2 = itsAnchors.size() >= 3 ? itsAnchors[2].data() : b1;
267  jevois::dnn::npu::yolo(outs[0], classIds, confidences, boxes, itsLabels.size(), b2, 2,
268  confThreshold, bsiz, fudge);
269  jevois::dnn::npu::yolo(outs[1], classIds, confidences, boxes, itsLabels.size(), b1, 1,
270  confThreshold, bsiz, fudge);
271  jevois::dnn::npu::yolo(outs[2], classIds, confidences, boxes, itsLabels.size(), b0, 0,
272  confThreshold, bsiz, fudge);
273  }
274  break;
275 
276  // ----------------------------------------------------------------------------------------------------
277  case jevois::dnn::postprocessor::DetectType::RAWYOLOv3tiny:
278  {
279  if (outs.size() != 2) LFATAL("Expected 2 output tensors but received " << outs.size());
280  static float const defaultbiases[12] {10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
281  float const * b0 = itsAnchors.size() >= 1 ? itsAnchors[0].data() : defaultbiases;
282  float const * b1 = itsAnchors.size() >= 2 ? itsAnchors[1].data() : b0;
283  jevois::dnn::npu::yolo(outs[0], classIds, confidences, boxes, itsLabels.size(), b1, 1,
284  confThreshold, bsiz, fudge);
285  jevois::dnn::npu::yolo(outs[1], classIds, confidences, boxes, itsLabels.size(), b0, 0,
286  confThreshold, bsiz, fudge);
287  }
288  break;
289 
290  default:
291  LFATAL("Post-processor detecttype " << detecttype::strget() << " not available on this hardware");
292  }
293 
294  // Cleanup overlapping boxes:
295  std::vector<int> indices;
296  cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
297 
298  // Now clamp boxes to be within blob, and adjust the boxes from blob size to input image size:
299  for (cv::Rect & b : boxes)
300  {
301  jevois::dnn::clamp(b, bsiz.width, bsiz.height);
302 
303  cv::Point2f tl = b.tl(); preproc->b2i(tl.x, tl.y);
304  cv::Point2f br = b.br(); preproc->b2i(br.x, br.y);
305  b.x = tl.x; b.y = tl.y; b.width = br.x - tl.x; b.height = br.y - tl.y;
306  }
307 
308  // Store results:
309  itsDetections.clear();
310  for (size_t i = 0; i < indices.size(); ++i)
311  {
312  int idx = indices[i];
313  cv::Rect const & box = boxes[idx];
314  jevois::ObjReco o {confidences[idx] * 100.0f, jevois::dnn::getLabel(itsLabels, classIds[idx]) };
315  std::vector<jevois::ObjReco> ov;
316  ov.emplace_back(o);
317  jevois::ObjDetect od { box.x, box.y, box.x + box.width, box.y + box.height, ov };
318  itsDetections.emplace_back(od);
319  }
320 }
321 
322 // ####################################################################################################
324  jevois::OptGUIhelper * helper, bool overlay,
325  bool JEVOIS_UNUSED_PARAM(idle))
326 {
327  for (jevois::ObjDetect const & o : itsDetections)
328  {
329  std::string categ, label;
330 
331  if (o.reco.empty())
332  {
333  categ = "unknown";
334  label = "unknown";
335  }
336  else
337  {
338  categ = o.reco[0].category;
339  label = jevois::sformat("%s: %.2f", categ.c_str(), o.reco[0].score);
340  }
341 
342  // If desired, draw boxes in output image:
343  if (outimg && overlay)
344  {
345  jevois::rawimage::drawRect(*outimg, o.tlx, o.tly, o.brx - o.tlx, o.bry - o.tly, 2, jevois::yuyv::LightGreen);
346  jevois::rawimage::writeText(*outimg, label, o.tlx + 6, o.tly + 2, jevois::yuyv::LightGreen,
348  }
349 
350 #ifdef JEVOIS_PRO
351  // If desired, draw results on GUI:
352  if (helper)
353  {
354  int col = jevois::dnn::stringToRGBA(categ, 0xff);
355  helper->drawRect(o.tlx, o.tly, o.brx, o.bry, col, true);
356  helper->drawText(o.tlx + 3.0f, o.tly + 3.0f, label.c_str(), col);
357  }
358 #else
359  (void)helper; // keep compiler happy
360 #endif
361 
362  // If desired, send results to serial port:
363  if (mod) mod->sendSerialObjDetImg2D(itsImageSize.width, itsImageSize.height, o);
364  }
365 }
jevois::Component::descriptor
std::string descriptor() const
Get our full descriptor (including all parents) as [Instancename]:[...]:[...].
Definition: Component.C:276
jevois::imu::get
Data collection mode RAW means that the latest available raw data is returned each time get() is called
jevois::dnn::PreProcessor::blobsize
cv::Size blobsize(size_t num) const
Access the width and height of a given blob, accounting for NCHW or NHWC.
Definition: PreProcessor.C:37
jevois::dnn::PostProcessorDetect::freeze
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
Definition: PostProcessorDetect.C:35
jevois::dnn::clamp
void clamp(cv::Rect &r, int width, int height)
Clamp a rectangle to within given image width and height.
Definition: Utils.C:250
Module.H
PostProcessorDetect.H
RawImageOps.H
jevois::sformat
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition: Utils.C:401
jevois::split
std::vector< std::string > split(std::string const &input, std::string const &regex="\\s+")
Split string into vector of tokens using a regex to specify what to split on; default regex splits by...
Definition: Utils.C:257
Utils.H
jevois::RawImage
A raw image as coming from a V4L2 Camera and/or being sent out to a USB Gadget.
Definition: RawImage.H:110
jevois::dnn::stringToRGBA
int stringToRGBA(std::string const &label, unsigned char alpha=128)
Compute a color from a label name.
Definition: Utils.C:76
o
#define o
Definition: Font10x20.C:6
jevois::dnn::readLabelsFile
std::map< int, std::string > readLabelsFile(std::string const &fname)
Read a label file.
Definition: Utils.C:25
jevois::GUIhelper::drawRect
void drawRect(float x1, float y1, float x2, float y2, ImU32 col=IM_COL32(128, 255, 128, 255), bool filled=true)
Draw rectangular box over an image.
Definition: GUIhelper.C:417
jevois::GUIhelper
Helper class to assist modules in creating graphical and GUI elements.
Definition: GUIhelper.H:108
jevois::rawimage::writeText
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
Write some text in an image.
Definition: RawImageOps.C:689
Engine.H
jevois::dnn::getLabel
std::string getLabel(std::map< int, std::string > const &labels, int id)
Get a label from an id.
Definition: Utils.C:68
PostProcessNPUhelpers.H
jevois::ObjReco
A trivial struct to store object recognition results.
Definition: ObjReco.H:23
jevois::dnn::PreProcessor
Pre-Processor for neural network pipeline.
Definition: PreProcessor.H:76
jevois::ObjDetect
A trivial struct to store object detection results.
Definition: ObjDetect.H:26
jevois::dnn::PostProcessorDetect::process
void process(std::vector< cv::Mat > const &outs, PreProcessor *preproc) override
Process outputs and draw/send some results.
Definition: PostProcessorDetect.C:74
jevois::join
std::string join(std::vector< std::string > const &strings, std::string const &delimiter)
Concatenate a vector of tokens into a string.
Definition: Utils.C:267
jevois::StdModule::sendSerialObjDetImg2D
void sendSerialObjDetImg2D(unsigned int camw, unsigned int camh, float x, float y, float w, float h, std::vector< ObjReco > const &res)
Send a standardized object detection + recognition message.
Definition: Module.C:577
jevois::dnn::PostProcessorDetect::onParamChange
void onParamChange(postprocessor::classes const &param, std::string const &val) override
LFATAL
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition: Log.H:217
jevois::GUIhelper::drawText
void drawText(float x, float y, char const *txt, ImU32 col=IM_COL32(128, 255, 128, 255))
Draw text over an image.
Definition: GUIhelper.C:479
jevois::absolutePath
std::string absolutePath(std::string const &root, std::string const &path)
Compute an absolute path from two paths.
Definition: Utils.C:347
PreProcessor.H
jevois::rawimage::drawRect
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
Draw a rectangle in a YUYV image.
Definition: RawImageOps.C:607
Utils.H
jevois::dnn::PreProcessor::imagesize
const cv::Size & imagesize() const
Access the last processed image size.
Definition: PreProcessor.C:33
jevois::dnn::PreProcessor::b2i
void b2i(float &x, float &y, size_t blobnum=0)
Convert coordinates from blob back to original image.
Definition: PreProcessor.C:44
jevois::dnn::npu::yolo
void yolo(cv::Mat const &out, std::vector< int > &classIds, std::vector< float > &confidences, std::vector< cv::Rect > &boxes, size_t nclass, float const *biases, int const yolonum, float confThreshold, cv::Size const &bsiz, int fudge)
Definition: PostProcessNPUhelpers.C:52
jevois::StdModule
Base class for a module that supports standardized serial messages.
Definition: Module.H:238
jevois::rawimage::Font10x20
@ Font10x20
Definition: RawImageOps.H:159
GUIhelper.H
jevois::dnn::PostProcessorDetect::~PostProcessorDetect
virtual ~PostProcessorDetect()
Destructor.
Definition: PostProcessorDetect.C:31
jevois::dnn::PostProcessorDetect::report
void report(jevois::StdModule *mod, jevois::RawImage *outimg=nullptr, jevois::OptGUIhelper *helper=nullptr, bool overlay=true, bool idle=false) override
Report what happened in last process() to console/output video/GUI.
Definition: PostProcessorDetect.C:323