JeVois  1.20
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
PostProcessorDetect.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
21 #include <jevois/DNN/Utils.H>
22 #include <jevois/Util/Utils.H>
24 #include <jevois/Core/Engine.H>
25 #include <jevois/Core/Module.H>
26 #include <jevois/GPU/GUIhelper.H>
27 
28 #include <opencv2/dnn.hpp>
29 
30 // ####################################################################################################
32 { }
33 
34 // ####################################################################################################
36 {
37  classes::freeze(doit);
38  detecttype::freeze(doit);
39  if (itsYOLO) itsYOLO->freeze(doit);
40 }
41 
42 // ####################################################################################################
43 void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::classes const & JEVOIS_UNUSED_PARAM(param),
44  std::string const & val)
45 {
46  if (val.empty()) { itsLabels.clear(); return; }
47 
48  // Get the dataroot of our network. We assume that there is a sub-component named "network" that is a sibling of us:
49  std::vector<std::string> dd = jevois::split(Component::descriptor(), ":");
50  dd.back() = "network"; dd.emplace_back("dataroot");
51  std::string const dataroot = engine()->getParamStringUnique(jevois::join(dd, ":"));
52 
53  itsLabels = jevois::dnn::readLabelsFile(jevois::absolutePath(dataroot, val));
54 }
55 // ####################################################################################################
56 void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::detecttype const & JEVOIS_UNUSED_PARAM(param),
57  postprocessor::DetectType const & val)
58 {
59  if (val == postprocessor::DetectType::RAWYOLO)
60  itsYOLO = addSubComponent<jevois::dnn::PostProcessorDetectYOLO>("yolo");
61  else
62  {
63  itsYOLO.reset();
64  removeSubComponent("yolo", false);
65  }
66 }
67 
68 // ####################################################################################################
69 void jevois::dnn::PostProcessorDetect::process(std::vector<cv::Mat> const & outs, jevois::dnn::PreProcessor * preproc)
70 {
71  if (outs.empty()) LFATAL("No outputs received, we need at least one.");
72  cv::Mat const & out = outs[0]; cv::MatSize const & msiz = out.size;
73 
74  float const confThreshold = cthresh::get() * 0.01F;
75  float const boxThreshold = dthresh::get() * 0.01F;
76  float const nmsThreshold = nms::get() * 0.01F;
77  int const fudge = classoffset::get();
78  itsImageSize = preproc->imagesize();
79 
80  // To draw boxes, we will need to:
81  // - scale from [0..1]x[0..1] to blobw x blobh
82  // - scale and center from blobw x blobh to input image w x h, provided by PreProcessor::b2i()
83  // - when using the GUI, we further scale and translate to OpenGL display coordinates using GUIhelper::i2d()
84  // Here we assume that the first blob sets the input size.
85  cv::Size const bsiz = preproc->blobsize(0);
86 
87  // We keep 3 vectors here instead of creating a class to hold all of the data because OpenCV will need that for
88  // non-maximum suppression:
89  std::vector<int> classIds;
90  std::vector<float> confidences;
91  std::vector<cv::Rect> boxes;
92  size_t const maxbox = maxnbox::get();
93 
94  // Here we just scale the coords from [0..1]x[0..1] to blobw x blobh:
95  try
96  {
97  switch(detecttype::get())
98  {
99  // ----------------------------------------------------------------------------------------------------
100  case jevois::dnn::postprocessor::DetectType::FasterRCNN:
101  {
102  // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
103  // a vector of values [batchId, classId, confidence, left, top, right, bottom]
104  if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
105  LTHROW("Expected 1 output blob with shape 1x1xNx7 for N detections with values "
106  "[batchId, classId, confidence, left, top, right, bottom]");
107 
108  float const * data = (float const *)out.data;
109  for (size_t i = 0; i < out.total(); i += 7)
110  {
111  float confidence = data[i + 2];
112  if (confidence > confThreshold)
113  {
114  int left = (int)data[i + 3];
115  int top = (int)data[i + 4];
116  int right = (int)data[i + 5];
117  int bottom = (int)data[i + 6];
118  int width = right - left + 1;
119  int height = bottom - top + 1;
120  classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
121  boxes.push_back(cv::Rect(left, top, width, height));
122  confidences.push_back(confidence);
123  if (classIds.size() > maxbox) break; // Stop if too many boxes
124  }
125  }
126  }
127  break;
128 
129  // ----------------------------------------------------------------------------------------------------
130  case jevois::dnn::postprocessor::DetectType::SSD:
131  {
132  // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
133  // a vector of values [batchId, classId, confidence, left, top, right, bottom]
134  if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
135  LTHROW("Expected 1 output blob with shape 1x1xNx7 for N detections with values "
136  "[batchId, classId, confidence, left, top, right, bottom]");
137 
138  float const * data = (float const *)out.data;
139  for (size_t i = 0; i < out.total(); i += 7)
140  {
141  float confidence = data[i + 2];
142  if (confidence > confThreshold)
143  {
144  int left = (int)(data[i + 3] * bsiz.width);
145  int top = (int)(data[i + 4] * bsiz.height);
146  int right = (int)(data[i + 5] * bsiz.width);
147  int bottom = (int)(data[i + 6] * bsiz.height);
148  int width = right - left + 1;
149  int height = bottom - top + 1;
150  classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
151  boxes.push_back(cv::Rect(left, top, width, height));
152  confidences.push_back(confidence);
153  if (classIds.size() > maxbox) break; // Stop if too many boxes
154  }
155  }
156  }
157  break;
158 
159  // ----------------------------------------------------------------------------------------------------
160  case jevois::dnn::postprocessor::DetectType::TPUSSD:
161  {
162  // Network produces 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count
163  // (see GetDetectionResults in detection/adapter.cc of libcoral):
164  if (outs.size() != 4)
165  LTHROW("Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
166  cv::Mat const & bboxes = outs[0];
167  cv::Mat const & ids = outs[1];
168  cv::Mat const & scores = outs[2];
169  cv::Mat const & count = outs[3];
170  if (bboxes.total() != 4 * ids.total() || bboxes.total() != 4 * scores.total() || count.total() != 1)
171  LTHROW("Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
172 
173  size_t num = count.at<float>(0);
174  if (num > ids.total()) LTHROW("Too many detections: " << num << " for only " << ids.total() << " ids");
175  float const * bb = (float const *)bboxes.data;
176 
177  for (size_t i = 0; i < num; ++i)
178  {
179  if (scores.at<float>(i) < confThreshold) continue;
180 
181  int top = (int)(bb[4 * i] * bsiz.height);
182  int left = (int)(bb[4 * i + 1] * bsiz.width);
183  int bottom = (int)(bb[4 * i + 2] * bsiz.height);
184  int right = (int)(bb[4 * i + 3] * bsiz.width);
185  int width = right - left + 1;
186  int height = bottom - top + 1;
187  classIds.push_back((int)(ids.at<float>(i)) + fudge); // Skip 0th background class id.
188  boxes.push_back(cv::Rect(left, top, width, height));
189  confidences.push_back(scores.at<float>(i));
190  if (classIds.size() > maxbox) break; // Stop if too many boxes
191  }
192  }
193  break;
194 
195  // ----------------------------------------------------------------------------------------------------
196  case jevois::dnn::postprocessor::DetectType::YOLO:
197  {
198  for (size_t i = 0; i < outs.size(); ++i)
199  {
200  // Network produces output blob(s) with shape Nx(5+C) where N is a number of detected objects and C is a number
201  // of classes + 5 where the first 5 numbers are [center_x, center_y, width, height, box score].
202  cv::Mat const & out = outs[i];
203  cv::MatSize const & ms = out.size; int const nd = ms.dims();
204  int nbox = -1, ndata = -1;
205 
206  if (nd >= 2)
207  {
208  nbox = ms[nd-2];
209  ndata = ms[nd-1];
210  for (int i = 0; i < nd-2; ++i) if (ms[i] != 1) nbox = -1; // reject if more than 2 effective dims
211  }
212 
213  if (nbox < 0 || ndata < 5)
214  LTHROW("Expected 1 or more output blobs with shape Nx(5+C) where N is the number of "
215  "detected objects, C is the number of classes, and the first 5 columns are "
216  "[center_x, center_y, width, height, box score]. // "
217  "Incorrect size " << jevois::dnn::shapestr(out) << " for output " << i <<
218  ": need Nx(5+C) or 1xNx(5+C)");
219 
220  // Some networks, like YOLOv5 or YOLOv7, output 3D 1xNx(5+C), so here we slice off the last 2 dims:
221  int sz2[] = { nbox, ndata };
222  cv::Mat const out2(2, sz2, out.type(), out.data);
223 
224  float const * data = (float const *)out2.data;
225  for (int j = 0; j < nbox; ++j, data += ndata)
226  {
227  if (data[4] < boxThreshold) continue; // skip if box score is too low
228 
229  cv::Mat scores = out2.row(j).colRange(5, ndata);
230  cv::Point classIdPoint; double confidence;
231  cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
232 
233  if (confidence < confThreshold) continue; // skip if class score too low
234 
235  // YOLO<5 produces boxes in [0..1[x[0..1[ and 2D output blob:
236  int centerX, centerY, width, height;
237  if (nd == 2)
238  {
239  centerX = (int)(data[0] * bsiz.width);
240  centerY = (int)(data[1] * bsiz.height);
241  width = (int)(data[2] * bsiz.width);
242  height = (int)(data[3] * bsiz.height);
243  }
244  else
245  {
246  // YOLOv5, YOLOv7 produce boxes already scaled by input blob size, and 3D output blob:
247  centerX = (int)(data[0]);
248  centerY = (int)(data[1]);
249  width = (int)(data[2]);
250  height = (int)(data[3]);
251  }
252 
253  int left = centerX - width / 2;
254  int top = centerY - height / 2;
255  boxes.push_back(cv::Rect(left, top, width, height));
256  classIds.push_back(classIdPoint.x);
257  confidences.push_back((float)confidence);
258  if (classIds.size() > maxbox) break; // Stop if too many boxes
259  }
260  }
261  }
262  break;
263 
264  // ----------------------------------------------------------------------------------------------------
265  case jevois::dnn::postprocessor::DetectType::RAWYOLO:
266  {
267  if (itsYOLO) itsYOLO->yolo(outs, classIds, confidences, boxes, itsLabels.size(), boxThreshold, confThreshold,
268  bsiz, fudge, maxbox);
269  else LFATAL("Internal error -- no YOLO subcomponent");
270  }
271  break;
272 
273  default:
274  // Do not use strget() here as it will throw!
275  LTHROW("Unsupported Post-processor detecttype " << int(detecttype::get()));
276  }
277  }
278  // Abort here if the received outputs were malformed:
279  catch (std::exception const & e)
280  {
281  std::string err = "Selected detecttype is " + detecttype::strget() + " and network produced:\n\n";
282  for (cv::Mat const & m : outs) err += "- " + jevois::dnn::shapestr(m) + "\n";
283  err += "\nFATAL ERROR(s):\n\n";
284  err += e.what();
285  LFATAL(err);
286  }
287 
288  // Cleanup overlapping boxes:
289  std::vector<int> indices;
290  cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
291 
292  // Now clamp boxes to be within blob, and adjust the boxes from blob size to input image size:
293  for (cv::Rect & b : boxes)
294  {
295  jevois::dnn::clamp(b, bsiz.width, bsiz.height);
296 
297  cv::Point2f tl = b.tl(); preproc->b2i(tl.x, tl.y);
298  cv::Point2f br = b.br(); preproc->b2i(br.x, br.y);
299  b.x = tl.x; b.y = tl.y; b.width = br.x - tl.x; b.height = br.y - tl.y;
300  }
301 
302  // Store results:
303  itsDetections.clear();
304  for (size_t i = 0; i < indices.size(); ++i)
305  {
306  int idx = indices[i];
307  cv::Rect const & box = boxes[idx];
308  jevois::ObjReco o {confidences[idx] * 100.0f, jevois::dnn::getLabel(itsLabels, classIds[idx]) };
309  std::vector<jevois::ObjReco> ov;
310  ov.emplace_back(o);
311  jevois::ObjDetect od { box.x, box.y, box.x + box.width, box.y + box.height, ov };
312  itsDetections.emplace_back(od);
313  }
314 }
315 
316 // ####################################################################################################
318  jevois::OptGUIhelper * helper, bool overlay,
319  bool JEVOIS_UNUSED_PARAM(idle))
320 {
321  for (jevois::ObjDetect const & o : itsDetections)
322  {
323  std::string categ, label;
324 
325  if (o.reco.empty())
326  {
327  categ = "unknown";
328  label = "unknown";
329  }
330  else
331  {
332  categ = o.reco[0].category;
333  label = jevois::sformat("%s: %.2f", categ.c_str(), o.reco[0].score);
334  }
335 
336  // If desired, draw boxes in output image:
337  if (outimg && overlay)
338  {
339  jevois::rawimage::drawRect(*outimg, o.tlx, o.tly, o.brx - o.tlx, o.bry - o.tly, 2, jevois::yuyv::LightGreen);
340  jevois::rawimage::writeText(*outimg, label, o.tlx + 6, o.tly + 2, jevois::yuyv::LightGreen,
342  }
343 
344 #ifdef JEVOIS_PRO
345  // If desired, draw results on GUI:
346  if (helper)
347  {
348  int col = jevois::dnn::stringToRGBA(categ, 0xff);
349  helper->drawRect(o.tlx, o.tly, o.brx, o.bry, col, true);
350  helper->drawText(o.tlx + 3.0f, o.tly + 3.0f, label.c_str(), col);
351  }
352 #else
353  (void)helper; // keep compiler happy
354 #endif
355 
356  // If desired, send results to serial port:
357  if (mod) mod->sendSerialObjDetImg2D(itsImageSize.width, itsImageSize.height, o);
358  }
359 }
jevois::Component::descriptor
std::string descriptor() const
Get our full descriptor (including all parents) as [Instancename]:[...]:[...].
Definition: Component.C:276
jevois::imu::get
Data collection mode RAW means that the latest available raw data is returned each time get() is called
jevois::dnn::PreProcessor::blobsize
cv::Size blobsize(size_t num) const
Access the width and height of a given blob, accounting for NCHW or NHWC.
Definition: PreProcessor.C:43
jevois::dnn::PostProcessorDetect::freeze
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
Definition: PostProcessorDetect.C:35
jevois::dnn::clamp
void clamp(cv::Rect &r, int width, int height)
Clamp a rectangle to within given image width and height.
Definition: Utils.C:366
Module.H
PostProcessorDetect.H
PostProcessorDetectYOLO.H
RawImageOps.H
jevois::sformat
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition: Utils.C:439
jevois::split
std::vector< std::string > split(std::string const &input, std::string const &regex="\\s+")
Split string into vector of tokens using a regex to specify what to split on; default regex splits by...
Definition: Utils.C:270
Utils.H
jevois::RawImage
A raw image as coming from a V4L2 Camera and/or being sent out to a USB Gadget.
Definition: RawImage.H:110
jevois::dnn::stringToRGBA
int stringToRGBA(std::string const &label, unsigned char alpha=128)
Compute a color from a label name.
Definition: Utils.C:75
o
#define o
Definition: Font10x20.C:6
jevois::dnn::readLabelsFile
std::map< int, std::string > readLabelsFile(std::string const &fname)
Read a label file.
Definition: Utils.C:24
jevois::GUIhelper::drawRect
void drawRect(float x1, float y1, float x2, float y2, ImU32 col=IM_COL32(128, 255, 128, 255), bool filled=true)
Draw rectangular box over an image.
Definition: GUIhelper.C:479
jevois::GUIhelper
Helper class to assist modules in creating graphical and GUI elements.
Definition: GUIhelper.H:128
LTHROW
#define LTHROW(msg)
Definition: Log.H:251
jevois::absolutePath
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition: Utils.C:385
jevois::dnn::shapestr
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition: Utils.C:104
jevois::rawimage::writeText
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
Write some text in an image.
Definition: RawImageOps.C:689
Engine.H
jevois::dnn::getLabel
std::string getLabel(std::map< int, std::string > const &labels, int id)
Get a label from an id.
Definition: Utils.C:67
jevois::ObjReco
A trivial struct to store object recognition results.
Definition: ObjReco.H:23
jevois::dnn::PreProcessor
Pre-Processor for neural network pipeline.
Definition: PreProcessor.H:108
jevois::ObjDetect
A trivial struct to store object detection results.
Definition: ObjDetect.H:26
jevois::dnn::PostProcessorDetect::process
void process(std::vector< cv::Mat > const &outs, PreProcessor *preproc) override
Process outputs and draw/send some results.
Definition: PostProcessorDetect.C:69
jevois::join
std::string join(std::vector< std::string > const &strings, std::string const &delimiter)
Concatenate a vector of tokens into a string.
Definition: Utils.C:280
jevois::StdModule::sendSerialObjDetImg2D
void sendSerialObjDetImg2D(unsigned int camw, unsigned int camh, float x, float y, float w, float h, std::vector< ObjReco > const &res)
Send a standardized object detection + recognition message.
Definition: Module.C:573
LFATAL
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
jevois::GUIhelper::drawText
void drawText(float x, float y, char const *txt, ImU32 col=IM_COL32(128, 255, 128, 255))
Draw text over an image.
Definition: GUIhelper.C:541
PreProcessor.H
jevois::dnn::PostProcessorDetect::onParamChange
void onParamChange(postprocessor::detecttype const &param, postprocessor::DetectType const &val) override
jevois::rawimage::drawRect
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
Draw a rectangle in a YUYV image.
Definition: RawImageOps.C:607
Utils.H
jevois::dnn::PreProcessor::imagesize
const cv::Size & imagesize() const
Access the last processed image size.
Definition: PreProcessor.C:39
jevois::dnn::PreProcessor::b2i
void b2i(float &x, float &y, size_t blobnum=0)
Convert coordinates from blob back to original image.
Definition: PreProcessor.C:50
jevois::StdModule
Base class for a module that supports standardized serial messages.
Definition: Module.H:232
jevois::rawimage::Font10x20
@ Font10x20
Definition: RawImageOps.H:159
GUIhelper.H
jevois::dnn::PostProcessorDetect::~PostProcessorDetect
virtual ~PostProcessorDetect()
Destructor.
Definition: PostProcessorDetect.C:31
jevois::dnn::PostProcessorDetect::report
void report(jevois::StdModule *mod, jevois::RawImage *outimg=nullptr, jevois::OptGUIhelper *helper=nullptr, bool overlay=true, bool idle=false) override
Report what happened in last process() to console/output video/GUI.
Definition: PostProcessorDetect.C:317