JeVois  1.18
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
PostProcessorDetectYOLO.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
19 #include <jevois/Debug/Log.H>
20 #include <jevois/Util/Async.H>
21 #include <jevois/DNN/Utils.H>
22 #include <nn_detect_common.h>
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <math.h>
28 #include <float.h>
29 #include <future>
30 
31 
32 // ####################################################################################################
33 void jevois::dnn::PostProcessorDetectYOLO::onParamChange(postprocessor::anchors const & JEVOIS_UNUSED_PARAM(param),
34  std::string const & val)
35 {
36  itsAnchors.clear();
37  if (val.empty()) return;
38 
39  auto tok = jevois::split(val, "\\s*;\\s*");
40  for (std::string const & t : tok)
41  {
42  auto atok = jevois::split(t, "\\s*,\\s*");
43  if (atok.size() & 1) LFATAL("Odd number of values not allowed in anchor spec [" << t << ']');
44  std::vector<float> a;
45  for (std::string const & at : atok) a.emplace_back(std::stof(at));
46  itsAnchors.emplace_back(std::move(a));
47  }
48 }
49 
50 // ####################################################################################################
52 { }
53 
54 // ####################################################################################################
56 {
57  anchors::freeze(doit);
58 }
59 
60 // ####################################################################################################
61 // Helper code from the detect_library of the NPU
62 namespace
63 {
64  inline float logistic_activate(float x)
65  { return 1.0F/(1.0F + expf(-x)); }
66 }
67 
68 // ####################################################################################################
69 void jevois::dnn::PostProcessorDetectYOLO::yolo(std::vector<cv::Mat> const & outs, std::vector<int> & classIds,
70  std::vector<float> & confidences, std::vector<cv::Rect> & boxes,
71  size_t nclass, float boxThreshold, float confThreshold,
72  cv::Size const & bsiz, int fudge, size_t const maxbox)
73 {
74  if (nclass == 0) nclass = 1; // Assume 1 class if no list of classes was given
75  size_t const nouts = outs.size();
76  if (nouts == 0) LTHROW("No output tensors received");
77  if (itsAnchors.size() != nouts) LTHROW("Need " << nouts << " sets of anchors");
78 
79  // std::vector<std::pair<float const * /* biases */, size_t /* num floats */>> biases;
80  /*
81  if (biases.empty())
82  {
83  // Use default biases:
84  size_t const biases_per_scale = default_size / nouts;
85  for (size_t i = 0; i < nouts; ++i)
86  biases.emplace_back(std::make_pair(&default_biases[i * biases_per_scale], biases_per_scale));
87  }
88  else
89  {
90  // Use biases provided in model zoo file:
91  if (anchors.size() != nouts) LTHROW("Need " << nouts << " sets of anchors");
92 
93  for (size_t i = 0; i < nouts; ++i)
94  biases.emplace_back(std::make_pair(&anchors[i][0], anchors[i].size()));
95  }
96 */
97  // Various networks will yield their YOLO outputs in various orders. But our default anchors (and the doc for the
98  // anchors parameter) assumes order from large to small, e.g., first 52x52, then 26x26, then 13x13. So here we need to
99  // sort the outputs in decreasing size order to get the correct yolonum:
100  if (itsYoloNum.empty())
101  {
102  for (size_t i = 0; i < nouts; ++i) itsYoloNum.emplace_back(i);
103  std::sort(itsYoloNum.begin(), itsYoloNum.end(),
104  [&outs](int const & a, int const & b) { return outs[a].total() > outs[b].total(); });
105 
106  // Allow users to check our assignment:
107  for (size_t i = 0; i < nouts; ++i)
108  {
109  int const yn = itsYoloNum[i];
110  std::vector<float> const & anc = itsAnchors[yn];
111  std::string vstr;
112  for (size_t a = 0; a < anc.size(); a += 2) vstr += jevois::sformat("%.2f,%.2f ", anc[a], anc[a+1]);
113  LINFO("Out " << i << ": " << jevois::dnn::shapestr(outs[i]) << ", scale=1/" << (8<<yn) <<
114  ", anchors=[ " << vstr <<']');
115  }
116  }
117 
118  // Run each scale in a thread:
119  bool sigmo = sigmoid::get();
120  float scale_xy = scalexy::get();
121  std::vector<std::future<void>> fvec;
122 
123  for (size_t i = 0; i < nouts; ++i)
124  fvec.emplace_back(jevois::async([&](size_t i)
125  { yolo_one(outs[i], classIds, confidences, boxes, nclass, itsYoloNum[i], boxThreshold, confThreshold,
126  bsiz, fudge, maxbox, sigmo, scale_xy); }, i));
127 
128  // Use joinall() to get() all futures and throw a single consolidated exception if any thread threw:
129  jevois::joinall(fvec);
130 }
131 
132 // ####################################################################################################
133 void jevois::dnn::PostProcessorDetectYOLO::yolo_one(cv::Mat const & out, std::vector<int> & classIds,
134  std::vector<float> & confidences, std::vector<cv::Rect> & boxes,
135  size_t nclass, int yolonum, float boxThreshold,
136  float confThreshold, cv::Size const & bsiz, int fudge,
137  size_t maxbox, bool sigmo, float scale_xy)
138 {
139  if (out.type() != CV_32F) LTHROW("Need FLOAT32 data");
140  cv::MatSize const & msiz = out.size;
141  if (msiz.dims() != 4 || msiz[0] != 1)
142  LTHROW("Incorrect tensor size: need 1xCxHxW or 1xHxWxC, got " << jevois::dnn::shapestr(out));
143 
144  // C=(dim[1] or dims[3]) is (coords = 4 + 1 for box score + classes) * n_anchors:
145  // n_anchors = 5 for yoloface, yolov2
146  // n_anchors = 3 for yolov3/v4/v5/v7 and those have 3 separate output tensors for 3 scales
147 
148  // Try NCHW first (e.g., from NPU):
149  bool nchw = true;
150  int w = msiz[3];
151  int h = msiz[2];
152  int constexpr coords = 4;
153  int const bbsize = coords + 1 + nclass;
154  int n = msiz[1] / bbsize;
155  if (msiz[1] % bbsize)
156  {
157  // Ok, try NHWC (e.g., YOLOv5 on Hailo):
158  nchw = false;
159  w = msiz[2];
160  h = msiz[1];
161  n = msiz[3] / bbsize;
162 
163  if (msiz[3] % bbsize)
164  LTHROW("Incorrect tensor size: need 1xCxHxW or 1xHxWxC where "
165  "C=num_anchors*(4 coords + 1 box_score + nclass object_scores), got " << jevois::dnn::shapestr(out) <<
166  ", nclass=" << nclass << ", num_anchors=" << itsAnchors[yolonum].size()/2);
167  }
168 
169  float const bfac = 1.0F / (8 << yolonum);
170  size_t const total = h * w * n * bbsize;
171  if (total != out.total()) LTHROW("Ooops");
172  std::vector<float> const & biases = itsAnchors[yolonum];
173  if (int(biases.size()) != n*2)
174  LTHROW(n << " boxes received but only " << biases.size()/2 << " boxw,boxh anchors provided");
175 
176  // Stride from one box field (coords, score, class) to the next:
177  size_t const stride = nchw ? h * w : 1;
178  size_t const nextloc = nchw ? 1 : n * bbsize;
179  float const * locptr = (float const *)out.data;
180  size_t const ncs = nclass * stride;
181 
182  // Loop over all locations:
183  for (int row = 0; row < h; ++row)
184  for (int col = 0; col < w; ++col)
185  {
186  // locptr points to the set of boxes at the current location. Initialize ptr to the first box:
187  float const * ptr = locptr;
188 
189  // Loop over all boxes per location:
190  for (int nn = 0; nn < n; ++nn)
191  {
192  // Apply logistic activation to box score:
193  float box_score = ptr[coords * stride];
194  if (sigmo) box_score = logistic_activate(box_score);
195 
196  if (box_score > boxThreshold)
197  {
198  // Get index of highest-scoring class and its score:
199  size_t const class_index = (coords + 1) * stride;
200  size_t maxidx = 0; float prob = 0.0F;
201  for (size_t k = 0; k < ncs; k += stride)
202  if (ptr[class_index + k] > prob) { prob = ptr[class_index + k]; maxidx = k; }
203  if (sigmo) prob = logistic_activate(prob);
204 
205  // Combine box and class scores:
206  prob *= box_score;
207 
208  // If best class was above threshold, keep that box:
209  if (prob > confThreshold)
210  {
211  // Decode the box and scale it to input blob dims:
212  cv::Rect b;
213 
214  if (scale_xy)
215  {
216  // New coordinates style, as in YOLOv5/7:
217  float bx = ptr[0 * stride], by = ptr[1 * stride], bw = ptr[2 * stride], bh = ptr[3 * stride];
218  if (sigmo)
219  {
220  bx = logistic_activate(bx);
221  by = logistic_activate(by);
222  bw = logistic_activate(bw);
223  bh = logistic_activate(bh);
224  }
225 
226  b.width = bw * bw * 4.0f * biases[2*nn] * bfac * bsiz.width / w + 0.499F;
227  b.height = bh * bh * 4.0F * biases[2*nn+1] * bfac * bsiz.height / h + 0.499F;
228  b.x = (bx * scale_xy - 0.5F + col) * bsiz.width / w + 0.499F - b.width / 2;
229  b.y = (by * scale_xy - 0.5F + row) * bsiz.height / h + 0.499F - b.height / 2;
230  }
231  else
232  {
233  // Old-style coordinates, as in YOLOv2/3/4:
234  b.width = expf(ptr[2 * stride]) * biases[2*nn] * bfac * bsiz.width / w + 0.499F;
235  b.height = expf(ptr[3 * stride]) * biases[2*nn+1] * bfac * bsiz.height / h + 0.499F;
236  b.x = (col + logistic_activate(ptr[0 * stride])) * bsiz.width / w + 0.499F - b.width / 2;
237  b.y = (row + logistic_activate(ptr[1 * stride])) * bsiz.height / h + 0.499F - b.height / 2;
238  }
239 
240  std::lock_guard<std::mutex> _(itsOutMtx);
241  boxes.emplace_back(b);
242  classIds.emplace_back(maxidx / stride + fudge);
243  confidences.emplace_back(prob);
244  if (classIds.size() > maxbox) return; // Stop if too many boxes
245  }
246  }
247 
248  // Next box within the current location:
249  ptr += bbsize * stride;
250  }
251  // Next location:
252  locptr += nextloc;
253  }
254 }
jevois::imu::get
Data collection mode RAW means that the latest available raw data is returned each time get() is called
jevois::async
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
Async execution using a thread pool.
Async.H
PostProcessorDetectYOLO.H
jevois::sformat
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition: Utils.C:419
jevois::split
std::vector< std::string > split(std::string const &input, std::string const &regex="\\s+")
Split string into vector of tokens using a regex to specify what to split on; default regex splits by...
Definition: Utils.C:258
Utils.H
jevois::dnn::PostProcessorDetectYOLO::~PostProcessorDetectYOLO
virtual ~PostProcessorDetectYOLO()
Destructor.
Definition: PostProcessorDetectYOLO.C:51
LTHROW
#define LTHROW(msg)
Definition: Log.H:251
jevois::dnn::shapestr
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition: Utils.C:104
jevois::joinall
std::vector< T > joinall(std::vector< std::future< T >> &fvec, bool multiline=true)
Collect results from several async threads that are all returning a T result.
Log.H
jevois::dnn::PostProcessorDetectYOLO::freeze
void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
Definition: PostProcessorDetectYOLO.C:55
LFATAL
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
h
int h
Definition: GUIhelper.C:1968
jevois::dnn::PostProcessorDetectYOLO::yolo
void yolo(std::vector< cv::Mat > const &outs, std::vector< int > &classIds, std::vector< float > &confidences, std::vector< cv::Rect > &boxes, size_t nclass, float boxThreshold, float confThreshold, cv::Size const &bsiz, int fudge, size_t const maxbox)
Generic raw YOLO processing.
Definition: PostProcessorDetectYOLO.C:69
LINFO
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
Definition: Log.H:194