JeVois  1.22
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
PostProcessorDetectYOLO.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
19#include <jevois/Debug/Log.H>
20#include <jevois/Util/Async.H>
21#include <jevois/DNN/Utils.H>
22#include <nn_detect_common.h>
23
24#include <stdlib.h>
25#include <stdio.h>
26#include <string.h>
27#include <math.h>
28#include <float.h>
29#include <future>
30
31
32// ####################################################################################################
33void jevois::dnn::PostProcessorDetectYOLO::onParamChange(postprocessor::anchors const &, std::string const & val)
34{
35 itsAnchors.clear();
36 if (val.empty()) return;
37
38 auto tok = jevois::split(val, "\\s*;\\s*");
39 for (std::string const & t : tok)
40 {
41 auto atok = jevois::split(t, "\\s*,\\s*");
42 if (atok.size() & 1) LFATAL("Odd number of values not allowed in anchor spec [" << t << ']');
43 std::vector<float> a;
44 for (std::string const & at : atok) a.emplace_back(std::stof(at));
45 itsAnchors.emplace_back(std::move(a));
46 }
47}
48
49// ####################################################################################################
52
53// ####################################################################################################
55{
56 anchors::freeze(doit);
57}
58
59// ####################################################################################################
60void jevois::dnn::PostProcessorDetectYOLO::yolo(std::vector<cv::Mat> const & outs, std::vector<int> & classIds,
61 std::vector<float> & confidences, std::vector<cv::Rect> & boxes,
62 size_t nclass, float boxThreshold, float confThreshold,
63 cv::Size const & bsiz, int fudge, size_t const maxbox, bool sigmo)
64{
65 if (nclass == 0) nclass = 1; // Assume 1 class if no list of classes was given
66 size_t const nouts = outs.size();
67 if (nouts == 0) LTHROW("No output tensors received");
68 if (itsAnchors.size() != nouts) LTHROW("Need " << nouts << " sets of anchors");
69
70 // Various networks will yield their YOLO outputs in various orders. But our default anchors (and the doc for the
71 // anchors parameter) assumes order from large to small, e.g., first 52x52, then 26x26, then 13x13. So here we need to
72 // sort the outputs in decreasing size order to get the correct yolonum:
73 if (itsYoloNum.empty())
74 {
75 for (size_t i = 0; i < nouts; ++i) itsYoloNum.emplace_back(i);
76 std::sort(itsYoloNum.begin(), itsYoloNum.end(),
77 [&outs](int const & a, int const & b) { return outs[a].total() > outs[b].total(); });
78
79 // Allow users to check our assignment:
80 for (size_t i = 0; i < nouts; ++i)
81 {
82 int const yn = itsYoloNum[i];
83 std::vector<float> const & anc = itsAnchors[yn];
84 std::string vstr;
85 for (size_t a = 0; a < anc.size(); a += 2) vstr += jevois::sformat("%.2f,%.2f ", anc[a], anc[a+1]);
86 LINFO("Out " << i << ": " << jevois::dnn::shapestr(outs[i]) << ", scale=1/" << (8<<yn) <<
87 ", anchors=[ " << vstr <<']');
88 }
89 }
90
91 // Run each scale in a thread:
92 float scale_xy = scalexy::get();
93 std::vector<std::future<void>> fvec;
94
95 for (size_t i = 0; i < nouts; ++i)
96 fvec.emplace_back(jevois::async([&](size_t i)
97 { yolo_one(outs[i], classIds, confidences, boxes, nclass, itsYoloNum[i], boxThreshold, confThreshold,
98 bsiz, fudge, maxbox, sigmo, scale_xy); }, i));
99
100 // Use joinall() to get() all futures and throw a single consolidated exception if any thread threw:
101 jevois::joinall(fvec);
102}
103
104// ####################################################################################################
105void jevois::dnn::PostProcessorDetectYOLO::yolo_one(cv::Mat const & out, std::vector<int> & classIds,
106 std::vector<float> & confidences, std::vector<cv::Rect> & boxes,
107 size_t nclass, int yolonum, float boxThreshold,
108 float confThreshold, cv::Size const & bsiz, int fudge,
109 size_t maxbox, bool sigmo, float scale_xy)
110{
111 if (out.type() != CV_32F) LTHROW("Need FLOAT32 data");
112 cv::MatSize const & msiz = out.size;
113 if (msiz.dims() != 4 || msiz[0] != 1)
114 LTHROW("Incorrect tensor size: need 1xCxHxW or 1xHxWxC, got " << jevois::dnn::shapestr(out));
115
116 // C=(dim[1] or dims[3]) is (coords = 4 + 1 for box score + classes) * n_anchors:
117 // n_anchors = 5 for yoloface, yolov2
118 // n_anchors = 3 for yolov3/v4/v5/v7 and those have 3 separate output tensors for 3 scales
119
120 // Try NCHW first (e.g., from NPU):
121 bool nchw = true;
122 int w = msiz[3];
123 int h = msiz[2];
124 int constexpr coords = 4;
125 int const bbsize = coords + 1 + nclass;
126 int n = msiz[1] / bbsize;
127 if (msiz[1] % bbsize)
128 {
129 // Ok, try NHWC (e.g., YOLOv5 on Hailo):
130 nchw = false;
131 w = msiz[2];
132 h = msiz[1];
133 n = msiz[3] / bbsize;
134
135 if (msiz[3] % bbsize)
136 LTHROW("Incorrect tensor size: need 1xCxHxW or 1xHxWxC where "
137 "C=num_anchors*(4 coords + 1 box_score + nclass object_scores), got " << jevois::dnn::shapestr(out) <<
138 ", nclass=" << nclass << ", num_anchors=" << itsAnchors[yolonum].size()/2);
139 }
140
141 float const bfac = 1.0F / (8 << yolonum);
142 size_t const total = h * w * n * bbsize;
143 if (total != out.total()) LTHROW("Ooops");
144 std::vector<float> const & biases = itsAnchors[yolonum];
145 if (int(biases.size()) != n*2)
146 LTHROW(n << " boxes received but only " << biases.size()/2 << " boxw,boxh anchors provided");
147
148 // Stride from one box field (coords, score, class) to the next:
149 size_t const stride = nchw ? h * w : 1;
150 size_t const nextloc = nchw ? 1 : n * bbsize;
151 float const * locptr = (float const *)out.data;
152 size_t const ncs = nclass * stride;
153
154 // Loop over all locations:
155 for (int row = 0; row < h; ++row)
156 for (int col = 0; col < w; ++col)
157 {
158 // locptr points to the set of boxes at the current location. Initialize ptr to the first box:
159 float const * ptr = locptr;
160
161 // Loop over all boxes per location:
162 for (int nn = 0; nn < n; ++nn)
163 {
164 // Apply logistic activation to box score:
165 float box_score = ptr[coords * stride];
166 if (sigmo) box_score = jevois::dnn::sigmoid(box_score);
167
168 if (box_score > boxThreshold)
169 {
170 // Get index of highest-scoring class and its score:
171 size_t const class_index = (coords + 1) * stride;
172 size_t maxidx = 0; float prob = 0.0F;
173 for (size_t k = 0; k < ncs; k += stride)
174 if (ptr[class_index + k] > prob) { prob = ptr[class_index + k]; maxidx = k; }
175 if (sigmo) prob = jevois::dnn::sigmoid(prob);
176
177 // Combine box and class scores:
178 prob *= box_score;
179
180 // If best class was above threshold, keep that box:
181 if (prob > confThreshold)
182 {
183 // Decode the box and scale it to input blob dims:
184 cv::Rect b;
185
186 if (scale_xy)
187 {
188 // New coordinates style, as in YOLOv5/7:
189 float bx = ptr[0 * stride], by = ptr[1 * stride], bw = ptr[2 * stride], bh = ptr[3 * stride];
190 if (sigmo)
191 {
192 bx = jevois::dnn::sigmoid(bx);
193 by = jevois::dnn::sigmoid(by);
194 bw = jevois::dnn::sigmoid(bw);
195 bh = jevois::dnn::sigmoid(bh);
196 }
197
198 b.width = bw * bw * 4.0f * biases[2*nn] * bfac * bsiz.width / w + 0.499F;
199 b.height = bh * bh * 4.0F * biases[2*nn+1] * bfac * bsiz.height / h + 0.499F;
200 b.x = (bx * scale_xy - 0.5F + col) * bsiz.width / w + 0.499F - b.width / 2;
201 b.y = (by * scale_xy - 0.5F + row) * bsiz.height / h + 0.499F - b.height / 2;
202 }
203 else
204 {
205 // Old-style coordinates, as in YOLOv2/3/4:
206 b.width = expf(ptr[2 * stride]) * biases[2*nn] * bfac * bsiz.width / w + 0.499F;
207 b.height = expf(ptr[3 * stride]) * biases[2*nn+1] * bfac * bsiz.height / h + 0.499F;
208 b.x = (col + jevois::dnn::sigmoid(ptr[0 * stride])) * bsiz.width / w + 0.499F - b.width / 2;
209 b.y = (row + jevois::dnn::sigmoid(ptr[1 * stride])) * bsiz.height / h + 0.499F - b.height / 2;
210 }
211
212 std::lock_guard<std::mutex> _(itsOutMtx);
213 boxes.emplace_back(b);
214 classIds.emplace_back(maxidx / stride + fudge);
215 confidences.emplace_back(prob);
216 if (classIds.size() > maxbox) return; // Stop if too many boxes
217 }
218 }
219
220 // Next box within the current location:
221 ptr += bbsize * stride;
222 }
223 // Next location:
224 locptr += nextloc;
225 }
226}
int h
Definition GUIhelper.C:2520
#define LTHROW(msg)
Definition Log.H:251
void yolo(std::vector< cv::Mat > const &outs, std::vector< int > &classIds, std::vector< float > &confidences, std::vector< cv::Rect > &boxes, size_t nclass, float boxThreshold, float confThreshold, cv::Size const &bsiz, int fudge, size_t const maxbox, bool sigmo)
Generic raw YOLO processing.
void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition Log.H:230
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
Definition Log.H:194
float sigmoid(float x)
Compute sigmoid using fastexp.
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition Utils.C:109
std::vector< T > joinall(std::vector< std::future< T > > &fvec, bool multiline=true)
Collect results from several async threads that are all returning a T result.
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
Async execution using a thread pool.
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition Utils.C:440
std::vector< std::string > split(std::string const &input, std::string const &regex="\\s+")
Split string into vector of tokens using a regex to specify what to split on; default regex splits by...
Definition Utils.C:270