JeVois  1.22
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
PostProcessorDetectOBB.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
20#include <jevois/DNN/Utils.H>
21#include <jevois/Util/Utils.H>
23#include <jevois/Core/Engine.H>
24#include <jevois/Core/Module.H>
26
27#include <opencv2/dnn.hpp>
28#include <cmath>
29
30// ####################################################################################################
33
34// ####################################################################################################
36{
37 classes::freeze(doit);
38 detecttypeobb::freeze(doit);
39}
40
41// ####################################################################################################
42void jevois::dnn::PostProcessorDetectOBB::onParamChange(postprocessor::classes const &, std::string const & val)
43{
44 if (val.empty()) { itsLabels.clear(); return; }
46}
47
48// ####################################################################################################
49void jevois::dnn::PostProcessorDetectOBB::onParamChange(postprocessor::detecttypeobb const &,
50 postprocessor::DetectTypeOBB const & val)
51{
52 // Nothing so far
53 (void)val;
54}
55
56// ####################################################################################################
57void jevois::dnn::PostProcessorDetectOBB::process(std::vector<cv::Mat> const & outs, jevois::dnn::PreProcessor * preproc)
58{
59 if (outs.empty()) LFATAL("No outputs received, we need at least one.");
60 cv::Mat const & out = outs[0]; cv::MatSize const & msiz = out.size;
61
62 float const confThreshold = cthresh::get() * 0.01F;
63 //float const boxThreshold = dthresh::get() * 0.01F;
64 float const nmsThreshold = nms::get() * 0.01F;
65 bool const sigmo = sigmoid::get();
66 int const fudge = classoffset::get();
67 itsImageSize = preproc->imagesize();
68
69 // To draw boxes, we will need to:
70 // - scale from [0..1]x[0..1] to blobw x blobh
71 // - scale and center from blobw x blobh to input image w x h, provided by PreProcessor::b2i()
72 // - when using the GUI, we further scale and translate to OpenGL display coordinates using GUIhelper::i2d()
73 // Here we assume that the first blob sets the input size.
74 //cv::Size const bsiz = preproc->blobsize(0);
75
76 // We keep 3 vectors here instead of creating a class to hold all of the data because OpenCV will need that for
77 // non-maximum suppression:
78 std::vector<int> classIds;
79 std::vector<float> confidences;
80 std::vector<cv::RotatedRect> boxes;
81
82 // Here we just scale the coords from [0..1]x[0..1] to blobw x blobh:
83 try
84 {
85 switch(detecttypeobb::get())
86 {
87 // ----------------------------------------------------------------------------------------------------
88 case jevois::dnn::postprocessor::DetectTypeOBB::YOLOv8:
89 {
90 // Network produces several (usually 3, for 3 strides) sets of 3 blobs: 1x64xHxW (raw boxes) and 1xCxHxW (class
91 // scores), 1x1xHxW (box angles):
92 if ((outs.size() % 3) != 0 || msiz.dims() != 4 || msiz[0] != 1)
93 LTHROW("Expected several (usually 3, for 3 strides) sets of 2 blobs: 1x64xHxW (raw boxes), "
94 "1xCxHxW (class scores), and 1x1xHxW (box angles)");
95
96 int stride = 8;
97 int constexpr reg_max = 16;
98
99 for (size_t idx = 0; idx < outs.size(); idx += 3)
100 {
101 cv::Mat const & bx = outs[idx]; cv::MatSize const & bx_siz = bx.size;
102 if (bx_siz.dims() != 4 || bx_siz[1] != 4 * reg_max) LTHROW("Output " << idx << " is not 4D 1x64xHxW");
103 float const * bx_data = (float const *)bx.data;
104
105 cv::Mat const & cls = outs[idx + 1]; cv::MatSize const & cls_siz = cls.size;
106 if (cls_siz.dims() != 4) LTHROW("Output " << idx << " is not 4D 1xCxHxW");
107 float const * cls_data = (float const *)cls.data;
108 size_t const nclass = cls_siz[1];
109
110 cv::Mat const & ang = outs[idx + 2]; cv::MatSize const & ang_siz = ang.size;
111 if (cls_siz.dims() != 4 || ang_siz[1] != 1) LTHROW("Output " << idx << " is not 4D 1x1xHxW");
112 float const * ang_data = (float const *)ang.data;
113
114 for (int i = 2; i < 4; ++i)
115 if (cls_siz[i] != bx_siz[i]) LTHROW("Mismatched HxW sizes for outputs " << idx << " .. " << idx + 1);
116
117 size_t const step = cls_siz[2] * cls_siz[3]; // HxW
118
119 // Loop over all locations:
120 for (int y = 0; y < cls_siz[2]; ++y)
121 for (int x = 0; x < cls_siz[3]; ++x)
122 {
123 // Get the top class score:
124 size_t best_idx = 0; float confidence = cls_data[0];
125 for (size_t i = 1; i < nclass; ++i)
126 if (cls_data[i * step] > confidence) { confidence = cls_data[i * step]; best_idx = i; }
127
128 // Apply sigmoid to it, if needed (output layer did not already have sigmoid activations):
129 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
130
131 if (confidence >= confThreshold)
132 {
133 // Decode a rotated box from 64 received values and one angle:
134 // See Netron and https://github.com/ultralytics/ultralytics/issues/624
135
136 // Raw angle is in [0..1] with an offset, such that a value 0.25 means 0.0:
137 // See in Netron how after the last conv layer for angles, 0.25 is subtracted then mul by pi:
138 float angle = (jevois::dnn::sigmoid(*ang_data) - 0.25F) * M_PI;
139
140 // Angle in [-pi/4,3/4 pi) -> [-pi/2,pi/2)
141 if (angle >= 0.5F * M_PI && angle <= 0.75F * M_PI) angle -= M_PI;
142
143 float const cosa = std::cos(angle);
144 float const sina = std::sin(angle);
145
146 // Now the rotated box:
147 float dst[reg_max];
148 float const ltx = softmax_dfl(bx_data, dst, reg_max, step);
149 float const lty = softmax_dfl(bx_data + reg_max * step, dst, reg_max, step);
150 float const rbx = softmax_dfl(bx_data + 2 * reg_max * step, dst, reg_max, step);
151 float const rby = softmax_dfl(bx_data + 3 * reg_max * step, dst, reg_max, step);
152
153 float const xf = 0.5F * (rbx - ltx);
154 float const yf = 0.5F * (rby - lty);
155
156 float const cx = (x + 0.5F + xf * cosa - yf * sina) * stride;
157 float const cy = (y + 0.5F + xf * sina + yf * cosa) * stride;
158 float const width = (ltx + rbx) * stride;
159 float const height = (lty + rby) * stride;
160
161 // Store this detection:
162 boxes.push_back(cv::RotatedRect(cv::Point2f(cx, cy), cv::Size2f(width, height), angle * 180.0F / M_PI));
163 classIds.push_back(int(best_idx) + fudge);
164 confidences.push_back(confidence);
165 }
166
167 // Move to the next location:
168 ++cls_data; ++bx_data; ++ang_data;
169 }
170
171 // Move to the next scale:
172 stride *= 2;
173 }
174 }
175 break;
176
177 // ----------------------------------------------------------------------------------------------------
178 default:
179 // Do not use strget() here as it will throw!
180 LTHROW("Unsupported Post-processor detecttype " << int(detecttypeobb::get()));
181 }
182 }
183 // Abort here if the received outputs were malformed:
184 catch (std::exception const & e)
185 {
186 std::string err = "Selected detecttypeobb is " + detecttypeobb::strget() + " and network produced:\n\n";
187 for (cv::Mat const & m : outs) err += "- " + jevois::dnn::shapestr(m) + "\n";
188 err += "\nFATAL ERROR(s):\n\n";
189 err += e.what();
190 LFATAL(err);
191 }
192
193 // Cleanup overlapping boxes, either globally or per class, and possibly limit number of reported boxes:
194 std::vector<int> indices;
195 /* not supported yet by opencv...
196 if (nmsperclass::get())
197 cv::dnn::NMSBoxesBatched(boxes, confidences, classIds, confThreshold, nmsThreshold, indices, 1.0F, maxnbox::get());
198 else */
199 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices, 1.0F, maxnbox::get());
200
201 // Now adjust the boxes from blob size to input image size:
202 for (cv::RotatedRect & b : boxes)
203 {
204 preproc->b2i(b.center.x, b.center.y);
205 preproc->b2is(b.size.width, b.size.height);
206 }
207
208 // Store results:
209 itsDetections.clear(); bool namonly = namedonly::get();
210 for (size_t i = 0; i < indices.size(); ++i)
211 {
212 int idx = indices[i];
213 cv::RotatedRect const & box = boxes[idx];
214 std::string const label = jevois::dnn::getLabel(itsLabels, classIds[idx], namonly);
215 if (namonly == false || label.empty() == false)
216 {
217 jevois::ObjReco o { confidences[idx] * 100.0f, label };
218 std::vector<jevois::ObjReco> ov;
219 ov.emplace_back(o);
220 jevois::ObjDetectOBB od { box, ov };
221 itsDetections.emplace_back(od);
222 }
223 }
224}
225
226// ####################################################################################################
228 jevois::OptGUIhelper * helper, bool overlay,
229 bool /*idle*/)
230{
231 bool const serreport = serialreport::get();
232
233 for (jevois::ObjDetectOBB const & o : itsDetections)
234 {
235 std::string categ, label;
236
237 if (o.reco.empty())
238 {
239 categ = "unknown";
240 label = "unknown";
241 }
242 else
243 {
244 categ = o.reco[0].category;
245 label = jevois::sformat("%s: %.2f", categ.c_str(), o.reco[0].score);
246 }
247
248 // If desired, draw boxes in output image:
249 if (outimg && overlay)
250 {
251 std::vector<cv::Point2f> pts; o.rect.points(pts);
252 for (size_t i = 1; i < pts.size(); ++i)
253 jevois::rawimage::drawLine(*outimg, pts[i-1].x, pts[i-1].y, pts[i].x, pts[i].y, 2, jevois::yuyv::LightGreen);
254 jevois::rawimage::drawLine(*outimg, pts.back().x, pts.back().y, pts[0].x, pts[0].y, 2, jevois::yuyv::LightGreen);
255
256 jevois::rawimage::writeText(*outimg, label, pts[0].x + 6, pts[0].y + 2, jevois::yuyv::LightGreen,
258 }
259
260#ifdef JEVOIS_PRO
261 // If desired, draw results on GUI:
262 if (helper)
263 {
264 int col = jevois::dnn::stringToRGBA(categ, 0xff);
265 std::vector<cv::Point2f> corners; o.rect.points(corners);
266 helper->drawPoly(corners, col, true);
267 helper->drawText(corners[1].x + 3.0f, corners[1].y + 3.0f, label.c_str(), col);
268 }
269#else
270 (void)helper; // keep compiler happy
271#endif
272
273 // If desired, send results to serial port:
274 if (mod && serreport) mod->sendSerialObjDetImg2D(itsImageSize.width, itsImageSize.height, o);
275 }
276}
277
278// ####################################################################################################
279std::vector<jevois::ObjDetectOBB> const & jevois::dnn::PostProcessorDetectOBB::latestDetectionsOBB() const
280{ return itsDetections; }
#define JEVOIS_SHARE_PATH
Base path for shared files (e.g., neural network weights, etc)
Definition Config.H:82
#define o
Definition Font10x20.C:6
#define LTHROW(msg)
Definition Log.H:251
Helper class to assist modules in creating graphical and GUI elements.
Definition GUIhelper.H:133
void drawText(float x, float y, char const *txt, ImU32 col=IM_COL32(128, 255, 128, 255))
Draw text over an image.
Definition GUIhelper.C:624
void drawPoly(std::vector< cv::Point > const &pts, ImU32 col=IM_COL32(128, 255, 128, 255), bool filled=true)
Draw polygon over an image.
Definition GUIhelper.C:514
A raw image as coming from a V4L2 Camera and/or being sent out to a USB Gadget.
Definition RawImage.H:111
Base class for a module that supports standardized serial messages.
Definition Module.H:234
void sendSerialObjDetImg2D(unsigned int camw, unsigned int camh, float x, float y, float w, float h, std::vector< ObjReco > const &res)
Send a standardized object detection + recognition message.
Definition Module.C:572
void report(jevois::StdModule *mod, jevois::RawImage *outimg=nullptr, jevois::OptGUIhelper *helper=nullptr, bool overlay=true, bool idle=false) override
Report what happened in last process() to console/output video/GUI.
std::vector< ObjDetectOBB > const & latestDetectionsOBB() const
Get the latest detections, use with caution, not thread-safe.
void onParamChange(postprocessor::detecttypeobb const &param, postprocessor::DetectTypeOBB const &val) override
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
void process(std::vector< cv::Mat > const &outs, PreProcessor *preproc) override
Process outputs and draw/send some results.
Pre-Processor for neural network pipeline.
cv::Size const & imagesize() const
Access the last processed image size.
void b2is(float &sx, float &sy, size_t blobnum=0)
Convert box size from blob back to original image.
void b2i(float &x, float &y, size_t blobnum=0)
Convert coordinates from blob back to original image.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition Log.H:230
std::string getLabel(std::map< int, std::string > const &labels, int id, bool namedonly=false)
Get a label from an id.
Definition Utils.C:68
std::map< int, std::string > readLabelsFile(std::string const &fname)
Read a label file.
Definition Utils.C:25
float sigmoid(float x)
Compute sigmoid using fastexp.
float softmax_dfl(float const *src, float *dst, size_t const n, size_t const stride=1)
Compute softmax and return DFL distance.
Definition Utils.C:752
int stringToRGBA(std::string const &label, unsigned char alpha=128)
Compute a color from a label name.
Definition Utils.C:80
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition Utils.C:109
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
Write some text in an image.
void drawLine(RawImage &img, int x1, int y1, int x2, int y2, unsigned int thick, unsigned int col)
Draw a line in a YUYV image.
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition Utils.C:440
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition Utils.C:386
unsigned short constexpr LightGreen
YUYV color value.
Definition RawImage.H:63
A trivial struct to store object detection results, for oriented bounding boxes (OBB)
Definition ObjDetect.H:38
A trivial struct to store object recognition results.
Definition ObjReco.H:25