JeVois  1.23
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
PostProcessorDetect.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
18#define IMGUI_DEFINE_MATH_OPERATORS // Access to math operators
19
23#include <jevois/DNN/Pipeline.H>
25#include <jevois/DNN/Network.H>
26#include <jevois/DNN/Utils.H>
27#include <jevois/Util/Utils.H>
29#include <jevois/Core/Engine.H>
30#include <jevois/Core/Module.H>
32
33#include <opencv2/opencv.hpp>
34#include <opencv2/dnn.hpp>
35#include <opencv2/imgproc/imgproc.hpp> // for findContours()
36#include <opencv2/imgcodecs.hpp> // for cv::imread()
37
38#ifdef JEVOIS_PRO
39#include <imgui.h>
40#include <imgui_internal.h>
41#endif
42
43// ####################################################################################################
46
47// ####################################################################################################
49{
50 if (itsYOLO) itsYOLO->freeze(doit);
51#ifdef JEVOIS_PRO
52 if (itsYOLOjevois) itsYOLOjevois->freeze(doit);
53#endif
54
55 classes::freeze(doit);
56 detecttype::freeze(doit);
57 auto dtyp = detecttype::get();
58
59 if (dtyp != postprocessor::DetectType::YOLOv8seg && dtyp != postprocessor::DetectType::YOLOv8segt)
60 masksmooth::freeze(doit);
61
62 if (dtyp != postprocessor::DetectType::YOLOX && dtyp != postprocessor::DetectType::YOLO &&
63 dtyp != postprocessor::DetectType::RAWYOLO)
64 dthresh::freeze(doit);
65}
66
67// ####################################################################################################
68void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::classes const &, std::string const & val)
69{
70 if (val.empty()) { itsLabels.clear(); return; }
71 itsLabels = jevois::dnn::getClassLabels(val);
72}
73
74// ####################################################################################################
75void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::perclassthresh const &, std::string const & val)
76{
77 itsPerClassThreshs.clear();
78 if (val.empty()) { cthresh::freeze(false); return; }
79 auto tok = jevois::split(val, "\\s*[, ]\\s*");
80
81 for (std::string const & t : tok)
82 itsPerClassThreshs.emplace_back(std::max(0.0001F, jevois::from_string<float>(t) * 0.01F));
83
84 cthresh::freeze(true);
85 dthresh::freeze(true);
86 // We will check for correct number of classes in process() once we have some input tensors
87}
88
89// ####################################################################################################
90void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::detecttype const &,
91 postprocessor::DetectType const & val)
92{
93 if (itsYOLO) { itsYOLO.reset(); removeSubComponent("yolo", false); }
94#ifdef JEVOIS_PRO
95 if (itsYOLOjevois) { itsYOLOjevois.reset(); removeSubComponent("yolojevois", false); }
96#endif
97
98 if (val == postprocessor::DetectType::RAWYOLO)
99 itsYOLO = addSubComponent<jevois::dnn::PostProcessorDetectYOLO>("yolo");
100
101#ifdef JEVOIS_PRO
102 if (val == postprocessor::DetectType::YOLOjevois || val == postprocessor::DetectType::YOLOjevoist)
103 {
104 itsYOLOjevois = addSubComponent<jevois::dnn::YOLOjevois>("yolojevois", itsLabels);
105 itsYOLOjevoisIsSetup = false;
106 }
107#endif
108}
109
110// ####################################################################################################
111void jevois::dnn::PostProcessorDetect::process(std::vector<cv::Mat> const & outs, jevois::dnn::PreProcessor * preproc)
112{
113 if (outs.empty()) LFATAL("No outputs received, we need at least one.");
114 cv::Mat const & out = outs[0]; cv::MatSize const & msiz = out.size;
115
116 float confThreshold = cthresh::get() * 0.01F;
117 float const boxThreshold = dthresh::get() * 0.01F;
118 float const nmsThreshold = nms::get() * 0.01F;
119 bool const sigmo = sigmoid::get();
120 bool const clampbox = boxclamp::get();
121 int const fudge = classoffset::get();
122 bool const smoothmsk = masksmooth::get();
123 itsImageSize = preproc->imagesize();
124
125 // To draw boxes, we will need to:
126 // - scale from [0..1]x[0..1] to blobw x blobh
127 // - scale and center from blobw x blobh to input image w x h, provided by PreProcessor::b2i()
128 // - when using the GUI, we further scale and translate to OpenGL display coordinates using GUIhelper::i2d()
129 // Here we assume that the first blob sets the input size.
130 cv::Size const bsiz = preproc->blobsize(0);
131
132 // We keep 3 vectors here instead of creating a class to hold all of the data because OpenCV will need that for
133 // non-maximum suppression:
134 std::vector<int> classIds;
135 std::vector<float> confidences;
136 std::vector<cv::Rect> boxes;
137 std::vector<cv::Mat> mask_coeffs; // mask coefficients when doing instance segmentation
138 cv::Mat mask_proto; // The output containing the mask prototypes (usually the last one)
139 int mask_proto_h = 1; // number of rows in the mask prototypes tensor, will be updated
140
141 // Here we just scale the coords from [0..1]x[0..1] to blobw x blobh:
142 try
143 {
144 switch(detecttype::get())
145 {
146 // ----------------------------------------------------------------------------------------------------
147 case jevois::dnn::postprocessor::DetectType::FasterRCNN:
148 {
149 if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
150 LTHROW("Expected 1 output blob with shape 1x1xNx7 for N detections with values "
151 "[batchId, classId, confidence, left, top, right, bottom]");
152
153 float const * data = (float const *)out.data;
154 for (size_t i = 0; i < out.total(); i += 7)
155 {
156 float confidence = data[i + 2];
157 if (confidence > confThreshold)
158 {
159 int left = (int)data[i + 3];
160 int top = (int)data[i + 4];
161 int right = (int)data[i + 5];
162 int bottom = (int)data[i + 6];
163 int width = right - left + 1;
164 int height = bottom - top + 1;
165 classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
166 boxes.push_back(cv::Rect(left, top, width, height));
167 confidences.push_back(confidence);
168 }
169 }
170 }
171 break;
172
173 // ----------------------------------------------------------------------------------------------------
174 case jevois::dnn::postprocessor::DetectType::SSD:
175 {
176 if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
177 LTHROW("Expected 1 output blob with shape 1x1xNx7 for N detections with values "
178 "[batchId, classId, confidence, left, top, right, bottom]");
179
180 float const * data = (float const *)out.data;
181 for (size_t i = 0; i < out.total(); i += 7)
182 {
183 float confidence = data[i + 2];
184 if (confidence > confThreshold)
185 {
186 int left = (int)(data[i + 3] * bsiz.width);
187 int top = (int)(data[i + 4] * bsiz.height);
188 int right = (int)(data[i + 5] * bsiz.width);
189 int bottom = (int)(data[i + 6] * bsiz.height);
190 int width = right - left + 1;
191 int height = bottom - top + 1;
192 classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
193 boxes.push_back(cv::Rect(left, top, width, height));
194 confidences.push_back(confidence);
195 }
196 }
197 }
198 break;
199
200 // ----------------------------------------------------------------------------------------------------
201 case jevois::dnn::postprocessor::DetectType::TPUSSD:
202 {
203 if (outs.size() != 4)
204 LTHROW("Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
205 cv::Mat const & bboxes = outs[0];
206 cv::Mat const & ids = outs[1];
207 cv::Mat const & scores = outs[2];
208 cv::Mat const & count = outs[3];
209 if (bboxes.total() != 4 * ids.total() || bboxes.total() != 4 * scores.total() || count.total() != 1)
210 LTHROW("Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
211
212 size_t num = count.at<float>(0);
213 if (num > ids.total()) LTHROW("Too many detections: " << num << " for only " << ids.total() << " ids");
214 float const * bb = (float const *)bboxes.data;
215
216 for (size_t i = 0; i < num; ++i)
217 {
218 if (scores.at<float>(i) < confThreshold) continue;
219
220 int top = (int)(bb[4 * i] * bsiz.height);
221 int left = (int)(bb[4 * i + 1] * bsiz.width);
222 int bottom = (int)(bb[4 * i + 2] * bsiz.height);
223 int right = (int)(bb[4 * i + 3] * bsiz.width);
224 int width = right - left + 1;
225 int height = bottom - top + 1;
226 classIds.push_back((int)(ids.at<float>(i)) + fudge); // Skip 0th background class id.
227 boxes.push_back(cv::Rect(left, top, width, height));
228 confidences.push_back(scores.at<float>(i));
229 }
230 }
231 break;
232
233 // ----------------------------------------------------------------------------------------------------
234 case jevois::dnn::postprocessor::DetectType::YOLO:
235 {
236 for (size_t i = 0; i < outs.size(); ++i)
237 {
238 // Network produces output blob(s) with shape Nx(5+C) where N is a number of detected objects and C is a number
239 // of classes + 5 where the first 5 numbers are [center_x, center_y, width, height, box score].
240 cv::Mat const & out = outs[i];
241 cv::MatSize const & ms = out.size; int const nd = ms.dims();
242 int nbox = -1, ndata = -1;
243
244 if (nd >= 2)
245 {
246 nbox = ms[nd-2];
247 ndata = ms[nd-1];
248 for (int i = 0; i < nd-2; ++i) if (ms[i] != 1) nbox = -1; // reject if more than 2 effective dims
249 }
250
251 if (nbox < 0 || ndata < 5)
252 LTHROW("Expected 1 or more output blobs with shape Nx(5+C) where N is the number of "
253 "detected objects, C is the number of classes, and the first 5 columns are "
254 "[center_x, center_y, width, height, box score]. // "
255 "Incorrect size " << jevois::dnn::shapestr(out) << " for output " << i <<
256 ": need Nx(5+C) or 1xNx(5+C)");
257
258 // Some networks, like YOLOv5 or YOLOv7, output 3D 1xNx(5+C), so here we slice off the last 2 dims:
259 int sz2[] = { nbox, ndata };
260 cv::Mat const out2(2, sz2, out.type(), out.data);
261
262 float const * data = (float const *)out2.data;
263 for (int j = 0; j < nbox; ++j, data += ndata)
264 {
265 if (data[4] < boxThreshold) continue; // skip if box score is too low
266
267 cv::Mat scores = out2.row(j).colRange(5, ndata);
268 cv::Point classIdPoint; double confidence;
269 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
270
271 if (confidence < confThreshold) continue; // skip if class score too low
272
273 // YOLO<5 produces boxes in [0..1[x[0..1[ and 2D output blob:
274 int centerX, centerY, width, height;
275 if (nd == 2)
276 {
277 centerX = (int)(data[0] * bsiz.width);
278 centerY = (int)(data[1] * bsiz.height);
279 width = (int)(data[2] * bsiz.width);
280 height = (int)(data[3] * bsiz.height);
281 }
282 else
283 {
284 // YOLOv5, YOLOv7 produce boxes already scaled by input blob size, and 3D output blob:
285 centerX = (int)(data[0]);
286 centerY = (int)(data[1]);
287 width = (int)(data[2]);
288 height = (int)(data[3]);
289 }
290
291 int left = centerX - width / 2;
292 int top = centerY - height / 2;
293 boxes.push_back(cv::Rect(left, top, width, height));
294 classIds.push_back(classIdPoint.x);
295 confidences.push_back((float)confidence);
296 }
297 }
298 }
299 break;
300
301 // ----------------------------------------------------------------------------------------------------
302 case jevois::dnn::postprocessor::DetectType::YOLOv10:
303 {
304 for (size_t i = 0; i < outs.size(); ++i)
305 {
306 cv::Mat const & out = outs[i];
307 cv::MatSize const & ms = out.size; int const nd = ms.dims();
308
309 if (jevois::dnn::effectiveDims(out) != 2 || ms[nd-1] < 5)
310 LTHROW("Expected 1 or more output blobs with shape Nx(4+C) where N is the number of "
311 "detected objects, C is the number of classes, and the first 4 columns are "
312 "[x1, y1, x2, y2]. // "
313 "Incorrect size " << jevois::dnn::shapestr(out) << " for output " << i <<
314 ": need Nx(4+C)");
315
316 // Some networks may produce 3D, slice off the last 2 dims:
317 int const nbox = ms[nd-2];
318 int const ndata = ms[nd-1];
319 int sz2[] = { nbox, ndata };
320 cv::Mat const out2(2, sz2, out.type(), out.data);
321
322 // Ok, we are ready with Nx(4+C):
323 float const * data = (float const *)out2.data;
324 for (int j = 0; j < nbox; ++j, data += ndata)
325 {
326 cv::Mat scores = out2.row(j).colRange(4, ndata);
327 cv::Point classIdPoint; double confidence;
328 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
329
330 if (confidence < confThreshold) continue; // skip if class score too low
331
332 // Boxes are already scaled by input blob size, and are x1, y1, x2, y2:
333 boxes.push_back(cv::Rect(data[0], data[1], data[2]-data[0]+1, data[3]-data[1]+1));
334 classIds.push_back(classIdPoint.x);
335 confidences.push_back((float)confidence);
336 }
337 }
338 }
339 break;
340
341 // ----------------------------------------------------------------------------------------------------
342 case jevois::dnn::postprocessor::DetectType::YOLOv10pp:
343 {
344 if (outs.size() != 1 || msiz.dims() != 3 || msiz[0] != 1 || msiz[2] != 6)
345 LTHROW("Expected 1 output blob with shape 1xNx6 for N detections with values "
346 "[left, top, right, bottom, confidence, classId]");
347
348 float const * data = (float const *)out.data;
349 for (size_t i = 0; i < out.total(); i += 6)
350 {
351 float confidence = data[i + 4];
352 if (confidence > confThreshold)
353 {
354 // Boxes are already scaled by input blob size, and are x1, y1, x2, y2:
355 int left = (int)data[i + 0];
356 int top = (int)data[i + 1];
357 int right = (int)data[i + 2];
358 int bottom = (int)data[i + 3];
359 int width = right - left + 1;
360 int height = bottom - top + 1;
361 classIds.push_back((int)(data[i + 5]) + fudge); // Skip 0th background class id.
362 boxes.push_back(cv::Rect(left, top, width, height));
363 confidences.push_back(confidence);
364 }
365 }
366 }
367 break;
368
369 // ----------------------------------------------------------------------------------------------------
370 case jevois::dnn::postprocessor::DetectType::RAWYOLO:
371 {
372 if (itsYOLO) itsYOLO->yolo(outs, classIds, confidences, boxes, itsLabels.size(), boxThreshold, confThreshold,
373 bsiz, fudge, maxnbox::get(), sigmo);
374 else LFATAL("Internal error -- no YOLO subcomponent");
375 }
376 break;
377
378 // ----------------------------------------------------------------------------------------------------
379 case jevois::dnn::postprocessor::DetectType::YOLOX:
380 {
381 if ((outs.size() % 3) != 0 || msiz.dims() != 4 || msiz[0] != 1)
382 LTHROW("Expected several (usually 3, for 3 strides) sets of 3 blobs: 1xHxWxC (class scores), 1xHxWx4 (boxes), "
383 "1xHxWx1 (objectness scores)");
384
385 int stride = 8;
386
387 for (size_t idx = 0; idx < outs.size(); idx += 3)
388 {
389 cv::Mat const & cls = outs[idx]; cv::MatSize const & cls_siz = cls.size;
390 if (cls_siz.dims() != 4) LTHROW("Output " << idx << " is not 4D 1xHxWxC");
391 float const * cls_data = (float const *)cls.data;
392
393 cv::Mat const & bx = outs[idx + 1]; cv::MatSize const & bx_siz = bx.size;
394 if (bx_siz.dims() != 4 || bx_siz[3] != 4) LTHROW("Output " << idx << " is not 1xHxWx4");
395 float const * bx_data = (float const *)bx.data;
396
397 cv::Mat const & obj = outs[idx + 2]; cv::MatSize const & obj_siz = obj.size;
398 if (obj_siz.dims() != 4 || obj_siz[3] != 1) LTHROW("Output " << idx << " is not 1xHxWx1");
399 float const * obj_data = (float const *)obj.data;
400
401 for (int i = 1; i < 3; ++i)
402 if (cls_siz[i] != bx_siz[i] || cls_siz[i] != obj_siz[i])
403 LTHROW("Mismatched HxW sizes for outputs " << idx << " .. " << idx + 2);
404
405 size_t const nclass = cls_siz[3];
406
407 // Loop over all locations:
408 for (int y = 0; y < cls_siz[1]; ++y)
409 for (int x = 0; x < cls_siz[2]; ++x)
410 {
411 // Only consider if objectness score is high enough:
412 float objectness = obj_data[0];
413 if (objectness >= boxThreshold)
414 {
415 // Get the top class score:
416 size_t best_idx = 0; float confidence = cls_data[0];
417 for (size_t i = 1; i < nclass; ++i)
418 if (cls_data[i] > confidence) { confidence = cls_data[i]; best_idx = i; }
419
420 confidence *= objectness;
421
422 if (confidence >= confThreshold)
423 {
424 // Decode the box:
425 float cx = (x /*+ 0.5F*/ + bx_data[0]) * stride;
426 float cy = (y /*+ 0.5F*/ + bx_data[1]) * stride;
427 float width = std::exp(bx_data[2]) * stride;
428 float height = std::exp(bx_data[3]) * stride;
429 float left = cx - 0.5F * width;
430 float top = cy - 0.5F * height;
431
432 // Store this detection:
433 boxes.push_back(cv::Rect(left, top, width, height));
434 classIds.push_back(int(best_idx) + fudge);
435 confidences.push_back(confidence);
436 }
437 }
438
439 // Move to the next location:
440 cls_data += nclass;
441 bx_data += 4;
442 obj_data += 1;
443 }
444
445 // Move to the next scale:
446 stride *= 2;
447 }
448 }
449 break;
450
451 // ----------------------------------------------------------------------------------------------------
452 case jevois::dnn::postprocessor::DetectType::YOLOv8t:
453 case jevois::dnn::postprocessor::DetectType::YOLOjevoist:
454 {
455 if ((outs.size() % 2) != 0 || msiz.dims() != 4 || msiz[0] != 1)
456 LTHROW("Expected several (usually 3, for 3 strides) sets of 2 blobs: 1xHxWx64 (raw boxes) "
457 "and 1xHxWxC (class scores)");
458
459 int stride = 8;
460 int constexpr reg_max = 16;
461
462 for (size_t idx = 0; idx < outs.size(); idx += 2)
463 {
464 cv::Mat const & bx = outs[idx]; cv::MatSize const & bx_siz = bx.size;
465 if (bx_siz.dims() != 4 || bx_siz[3] != 4 * reg_max) LTHROW("Output " << idx << " is not 4D 1xHxWx64");
466 float const * bx_data = (float const *)bx.data;
467
468 cv::Mat const & cls = outs[idx + 1]; cv::MatSize const & cls_siz = cls.size;
469 if (cls_siz.dims() != 4) LTHROW("Output " << idx << " is not 4D 1xHxWxC");
470 float const * cls_data = (float const *)cls.data;
471 size_t const nclass = cls_siz[3];
472
473 if (itsPerClassThreshs.empty() == false && itsPerClassThreshs.size() != nclass)
474 LTHROW("Output tensor has " << nclass << " classes but " << itsPerClassThreshs.size() <<
475 " values given in perclassthresh -- both must match");
476
477 for (int i = 1; i < 3; ++i)
478 if (cls_siz[i] != bx_siz[i]) LTHROW("Mismatched HxW sizes for outputs " << idx << " .. " << idx + 1);
479
480 // Loop over all locations:
481 for (int y = 0; y < cls_siz[1]; ++y)
482 for (int x = 0; x < cls_siz[2]; ++x)
483 {
484 // Get the top class score:
485 size_t best_idx = 0; float confidence = cls_data[0];
486
487 if (itsPerClassThreshs.empty())
488 {
489 // Standard processing with a single cthresh for all classes:
490 for (size_t i = 1; i < nclass; ++i)
491 if (cls_data[i] > confidence) { confidence = cls_data[i]; best_idx = i; }
492
493 // Apply sigmoid to it, if needed (output layer did not already have sigmoid activations):
494 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
495 }
496 else
497 {
498 // Per-class thresholds: pick the class with the highest confidence relative to its thresh:
499 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
500 for (size_t i = 1; i < nclass; ++i)
501 {
502 float c = cls_data[i];
503 if (sigmo) c = jevois::dnn::sigmoid(c);
504 if (c / itsPerClassThreshs[i] > confidence / itsPerClassThreshs[best_idx])
505 { confidence = c; best_idx = i; }
506 }
507 confThreshold = itsPerClassThreshs[best_idx];
508 }
509
510 if (confidence >= confThreshold)
511 {
512 // Decode a 4-coord box from 64 received values:
513 // Code here inspired from https://github.com/trinhtuanvubk/yolo-ncnn-cpp/blob/main/yolov8/yolov8.cpp
514 float dst[reg_max];
515
516 float xmin = (x + 0.5f - softmax_dfl(bx_data, dst, reg_max)) * stride;
517 float ymin = (y + 0.5f - softmax_dfl(bx_data + reg_max, dst, reg_max)) * stride;
518 float xmax = (x + 0.5f + softmax_dfl(bx_data + 2 * reg_max, dst, reg_max)) * stride;
519 float ymax = (y + 0.5f + softmax_dfl(bx_data + 3 * reg_max, dst, reg_max)) * stride;
520
521 // Store this detection:
522 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
523 classIds.push_back(int(best_idx) + fudge);
524 confidences.push_back(confidence);
525 }
526
527 // Move to the next location:
528 cls_data += nclass;
529 bx_data += 4 * reg_max;
530 }
531
532 // Move to the next scale:
533 stride *= 2;
534 }
535 }
536 break;
537
538 // ----------------------------------------------------------------------------------------------------
539 case jevois::dnn::postprocessor::DetectType::YOLOv8:
540 case jevois::dnn::postprocessor::DetectType::YOLOjevois:
541 {
542 if ((outs.size() % 2) != 0 || msiz.dims() != 4 || msiz[0] != 1)
543 LTHROW("Expected several (usually 3, for 3 strides) sets of 2 blobs: 1x64xHxW (raw boxes) "
544 "and 1xCxHxW (class scores)");
545
546 int stride = 8;
547 int constexpr reg_max = 16;
548
549 for (size_t idx = 0; idx < outs.size(); idx += 2)
550 {
551 cv::Mat const & bx = outs[idx]; cv::MatSize const & bx_siz = bx.size;
552 if (bx_siz.dims() != 4 || bx_siz[1] != 4 * reg_max) LTHROW("Output " << idx << " is not 4D 1x64xHxW");
553 float const * bx_data = (float const *)bx.data;
554
555 cv::Mat const & cls = outs[idx + 1]; cv::MatSize const & cls_siz = cls.size;
556 if (cls_siz.dims() != 4) LTHROW("Output " << idx << " is not 4D 1xCxHxW");
557 float const * cls_data = (float const *)cls.data;
558 size_t const nclass = cls_siz[1];
559
560 if (itsPerClassThreshs.empty() == false && itsPerClassThreshs.size() != nclass)
561 LTHROW("Output tensor has " << nclass << " classes but " << itsPerClassThreshs.size() <<
562 " values given in perclassthresh -- both must match");
563
564 for (int i = 2; i < 4; ++i)
565 if (cls_siz[i] != bx_siz[i]) LTHROW("Mismatched HxW sizes for outputs " << idx << " .. " << idx + 1);
566
567 size_t const step = cls_siz[2] * cls_siz[3]; // HxW
568
569 // Loop over all locations:
570 for (int y = 0; y < cls_siz[2]; ++y)
571 for (int x = 0; x < cls_siz[3]; ++x)
572 {
573 // Get the top class score:
574 size_t best_idx = 0; float confidence = cls_data[0];
575
576 if (itsPerClassThreshs.empty())
577 {
578 // Standard processing with a single cthresh for all classes:
579 for (size_t i = 1; i < nclass; ++i)
580 if (cls_data[i * step] > confidence) { confidence = cls_data[i * step]; best_idx = i; }
581
582 // Apply sigmoid to it, if needed (output layer did not already have sigmoid activations):
583 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
584 }
585 else
586 {
587 // Per-class thresholds: pick the class with the highest confidence relative to its thresh:
588 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
589 for (size_t i = 1; i < nclass; ++i)
590 {
591 float c = cls_data[i * step];
592 if (sigmo) c = jevois::dnn::sigmoid(c);
593 if (c / itsPerClassThreshs[i] > confidence / itsPerClassThreshs[best_idx])
594 { confidence = c; best_idx = i; }
595 }
596 confThreshold = itsPerClassThreshs[best_idx];
597 }
598
599 if (confidence >= confThreshold)
600 {
601 // Decode a 4-coord box from 64 received values:
602 // Code here inspired from https://github.com/trinhtuanvubk/yolo-ncnn-cpp/blob/main/yolov8/yolov8.cpp
603 float dst[reg_max];
604
605 float xmin = (x + 0.5f - softmax_dfl(bx_data, dst, reg_max, step)) * stride;
606 float ymin = (y + 0.5f - softmax_dfl(bx_data + reg_max * step, dst, reg_max, step)) * stride;
607 float xmax = (x + 0.5f + softmax_dfl(bx_data + 2 * reg_max * step, dst, reg_max, step)) * stride;
608 float ymax = (y + 0.5f + softmax_dfl(bx_data + 3 * reg_max * step, dst, reg_max, step)) * stride;
609
610 // Store this detection:
611 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
612 classIds.push_back(int(best_idx) + fudge);
613 confidences.push_back(confidence);
614 }
615
616 // Move to the next location:
617 ++cls_data;
618 ++bx_data;
619 }
620
621 // Move to the next scale:
622 stride *= 2;
623 }
624 }
625 break;
626
627 // ----------------------------------------------------------------------------------------------------
628 case jevois::dnn::postprocessor::DetectType::YOLOv8seg:
629 {
630 if (outs.size() % 3 != 1 || msiz.dims() != 4 || msiz[0] != 1)
631 LTHROW("Expected several (usually 3, for 3 strides) sets of 3 tensors: 1x64xHxW (raw boxes), "
632 "1xCxHxW (class scores), and 1xMxHxW (mask coeffs for M masks); then one 1xMxHxW for M mask prototypes");
633
634 int stride = 8;
635 int constexpr reg_max = 16;
636
637 // Get the mask prototypes as 2D 32xHW:
638 cv::MatSize const & mps = outs.back().size;
639 if (mps.dims() != 4) LTHROW("Mask prototypes not 4D 1xMxHxW");
640 mask_proto = cv::Mat(std::vector<int>{ mps[1], mps[2] * mps[3] }, CV_32F, outs.back().data);
641 int const mask_num = mps[1];
642 mask_proto_h = mps[2]; // will be needed later to unpack from HW to HxW
643
644 // Process each scale (aka stride):
645 for (size_t idx = 0; idx < outs.size() - 1; idx += 3)
646 {
647 cv::Mat const & bx = outs[idx]; cv::MatSize const & bx_siz = bx.size;
648 if (bx_siz.dims() != 4 || bx_siz[1] != 4 * reg_max) LTHROW("Output " << idx << " is not 4D 1x64xHxW");
649 float const * bx_data = (float const *)bx.data;
650
651 cv::Mat const & cls = outs[idx + 1]; cv::MatSize const & cls_siz = cls.size;
652 if (cls_siz.dims() != 4) LTHROW("Output " << idx << " is not 4D 1xCxHxW");
653 float const * cls_data = (float const *)cls.data;
654 size_t const nclass = cls_siz[1];
655
656 cv::Mat const & msk = outs[idx + 2]; cv::MatSize const & msk_siz = msk.size;
657 if (msk_siz.dims() != 4 || msk_siz[1] != mask_num) LTHROW("Output " << idx << " is not 4D 1xMxHxW");
658 float const * msk_data = (float const *)msk.data;
659
660 for (int i = 2; i < 4; ++i)
661 if (cls_siz[i] != bx_siz[i] || cls_siz[i] != msk_siz[i])
662 LTHROW("Mismatched HxW sizes for outputs " << idx << " .. " << idx + 1);
663
664 size_t const step = cls_siz[2] * cls_siz[3]; // HxW
665
666 // Loop over all locations:
667 for (int y = 0; y < cls_siz[2]; ++y)
668 for (int x = 0; x < cls_siz[3]; ++x)
669 {
670 // Get the top class score:
671 size_t best_idx = 0; float confidence = cls_data[0];
672 for (size_t i = 1; i < nclass; ++i)
673 if (cls_data[i * step] > confidence) { confidence = cls_data[i * step]; best_idx = i; }
674
675 // Apply sigmoid to it, if needed (output layer did not already have sigmoid activations):
676 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
677
678 if (confidence >= confThreshold)
679 {
680 // Decode a 4-coord box from 64 received values:
681 float dst[reg_max];
682
683 float xmin = (x + 0.5f - softmax_dfl(bx_data, dst, reg_max, step)) * stride;
684 float ymin = (y + 0.5f - softmax_dfl(bx_data + reg_max * step, dst, reg_max, step)) * stride;
685 float xmax = (x + 0.5f + softmax_dfl(bx_data + 2 * reg_max * step, dst, reg_max, step)) * stride;
686 float ymax = (y + 0.5f + softmax_dfl(bx_data + 3 * reg_max * step, dst, reg_max, step)) * stride;
687
688 // Store this detection:
689 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
690 classIds.push_back(int(best_idx) + fudge);
691 confidences.push_back(confidence);
692
693 // Also store raw mask coefficients data, will decode the masks after NMS to save time:
694 cv::Mat coeffs(1, mask_num, CV_32F); float * cptr = (float *)coeffs.data;
695 for (int i = 0; i < mask_num; ++i) *cptr++ = msk_data[i * step];
696 mask_coeffs.emplace_back(coeffs);
697 }
698
699 // Move to the next location:
700 ++cls_data; ++bx_data; ++msk_data;
701 }
702
703 // Move to the next scale:
704 stride *= 2;
705 }
706 }
707 break;
708
709 // ----------------------------------------------------------------------------------------------------
710 case jevois::dnn::postprocessor::DetectType::YOLOv8segt:
711 {
712 if (outs.size() % 3 != 1 || msiz.dims() != 4 || msiz[0] != 1)
713 LTHROW("Expected several (usually 3, for 3 strides) sets of 3 tensors: 1xHxWx64 (raw boxes), "
714 "1xHxWxC (class scores), and 1xHxWxM (mask coeffs for M masks); then one 1xHxWxM for M mask prototypes");
715
716 int stride = 8;
717 int constexpr reg_max = 16;
718
719 // Get the mask prototypes as 2D HWx32:
720 cv::MatSize const & mps = outs.back().size;
721 if (mps.dims() != 4) LTHROW("Mask prototypes not 4D 1xHxWxM");
722 mask_proto = cv::Mat(std::vector<int>{ mps[1] * mps[2], mps[3] }, CV_32F, outs.back().data);
723 int const mask_num = mps[3];
724 mask_proto_h = mps[1]; // will be needed later to unpack from HW to HxW
725
726 // Process each scale (aka stride):
727 for (size_t idx = 0; idx < outs.size() - 1; idx += 3)
728 {
729 cv::Mat const & bx = outs[idx]; cv::MatSize const & bx_siz = bx.size;
730 if (bx_siz.dims() != 4 || bx_siz[3] != 4 * reg_max) LTHROW("Output " << idx << " is not 4D 1xHxWx64");
731 float const * bx_data = (float const *)bx.data;
732
733 cv::Mat const & cls = outs[idx + 1]; cv::MatSize const & cls_siz = cls.size;
734 if (cls_siz.dims() != 4) LTHROW("Output " << idx << " is not 4D 1xHxWxC");
735 float const * cls_data = (float const *)cls.data;
736 size_t const nclass = cls_siz[3];
737
738 cv::Mat const & msk = outs[idx + 2]; cv::MatSize const & msk_siz = msk.size;
739 if (msk_siz.dims() != 4 || msk_siz[3] != mask_num) LTHROW("Output " << idx << " is not 4D 1xHxWxM");
740 float const * msk_data = (float const *)msk.data;
741
742 for (int i = 1; i < 3; ++i)
743 if (cls_siz[i] != bx_siz[i] || cls_siz[i] != msk_siz[i])
744 LTHROW("Mismatched HxW sizes for outputs " << idx << " .. " << idx + 1);
745
746 // Loop over all locations:
747 for (int y = 0; y < cls_siz[1]; ++y)
748 for (int x = 0; x < cls_siz[2]; ++x)
749 {
750 // Get the top class score:
751 size_t best_idx = 0; float confidence = cls_data[0];
752 for (size_t i = 1; i < nclass; ++i)
753 if (cls_data[i] > confidence) { confidence = cls_data[i]; best_idx = i; }
754
755 // Apply sigmoid to it, if needed (output layer did not already have sigmoid activations):
756 if (sigmo) confidence = jevois::dnn::sigmoid(confidence);
757
758 if (confidence >= confThreshold)
759 {
760 // Decode a 4-coord box from 64 received values:
761 float dst[reg_max];
762
763 float xmin = (x + 0.5f - softmax_dfl(bx_data, dst, reg_max)) * stride;
764 float ymin = (y + 0.5f - softmax_dfl(bx_data + reg_max, dst, reg_max)) * stride;
765 float xmax = (x + 0.5f + softmax_dfl(bx_data + 2 * reg_max, dst, reg_max)) * stride;
766 float ymax = (y + 0.5f + softmax_dfl(bx_data + 3 * reg_max, dst, reg_max)) * stride;
767
768 // Store this detection:
769 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
770 classIds.push_back(int(best_idx) + fudge);
771 confidences.push_back(confidence);
772
773 // Also store raw mask coefficients data, will decode the masks after NMS to save time:
774 cv::Mat coeffs(mask_num, 1, CV_32F);
775 std::memcpy(coeffs.data, msk_data, mask_num * sizeof(float));
776 mask_coeffs.emplace_back(coeffs);
777 }
778
779 // Move to the next location:
780 cls_data += nclass;
781 bx_data += 4 * reg_max;
782 msk_data += mask_num;
783 }
784
785 // Move to the next scale:
786 stride *= 2;
787 }
788 }
789 break;
790
791 // ----------------------------------------------------------------------------------------------------
792 default:
793 // Do not use strget() here as it will throw!
794 LTHROW("Unsupported Post-processor detecttype " << int(detecttype::get()));
795 }
796 }
797 // Abort here if the received outputs were malformed:
798 catch (std::exception const & e)
799 {
800 std::string err = "Selected detecttype is " + detecttype::strget() + " and network produced:\n\n";
801 for (cv::Mat const & m : outs) err += "- " + jevois::dnn::shapestr(m) + "\n";
802 err += "\nFATAL ERROR(s):\n\n";
803 err += e.what();
804 LFATAL(err);
805 }
806
807 // Cleanup overlapping boxes, either globally or per class, and possibly limit number of reported boxes:
808 std::vector<int> indices;
809 if (nmsperclass::get())
810 cv::dnn::NMSBoxesBatched(boxes, confidences, classIds, confThreshold, nmsThreshold, indices, 1.0F, maxnbox::get());
811 else
812 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices, 1.0F, maxnbox::get());
813
814 // Store results:
815 itsDetections.clear(); bool namonly = namedonly::get();
816 std::vector<cv::Vec4i> contour_hierarchy;
817
818 for (size_t i = 0; i < indices.size(); ++i)
819 {
820 int idx = indices[i];
821 std::string const label = jevois::dnn::getLabel(itsLabels, classIds[idx], namonly);
822 if (namonly == false || label.empty() == false)
823 {
824 cv::Rect & b = boxes[idx];
825
826 // Now clamp box to be within blob:
827 if (clampbox) jevois::dnn::clamp(b, bsiz.width, bsiz.height);
828
829 // Decode the mask if doing instance segmentation:
830 std::vector<cv::Point> poly;
831 if (mask_coeffs.empty() == false)
832 {
833 // Multiply the 1x32 mask coeffs by the 32xHW mask prototypes to get a 1xHW weighted mask (YOLOv8seg), or
834 // multiply the HWx32 mask prototypes by the 32x1 mask coeffs to get a HWx1 weighted mask (YOLOv8segt):
835 cv::Mat weighted_mask;
836 if (mask_coeffs[idx].rows == 1) weighted_mask = mask_coeffs[idx] * mask_proto;
837 else weighted_mask = mask_proto * mask_coeffs[idx];
838
839 // Reshape to HxW:
840 weighted_mask = weighted_mask.reshape(0, mask_proto_h);
841
842 // Apply sigmoid to all mask elements:
843 jevois::dnn::sigmoid(weighted_mask);
844
845 // Typically, mask prototypes are 4x smaller than input blob; we want to detect contours inside the obj rect. We
846 // have two approaches here: 1) detect contours on the original masks at low resolution (faster but contours are
847 // not very smooth), 2) scale the mask 4x with bilinear interpolation and then detect the contours (slower but
848 // smoother contours):
849 int mask_scale = bsiz.height / mask_proto_h;
850 if (smoothmsk)
851 {
852 cv::Mat src = weighted_mask;
853 cv::resize(src, weighted_mask, cv::Size(), mask_scale, mask_scale, cv::INTER_LINEAR);
854 mask_scale = 1;
855 }
856
857 cv::Rect scaled_rect(b.tl() / mask_scale, b.br() / mask_scale);
858 scaled_rect &= cv::Rect(cv::Point(0, 0), weighted_mask.size()); // constrain roi to within mask image
859
860 // Binarize the mask roi:
861 cv::Mat roi_mask; cv::threshold(weighted_mask(scaled_rect), roi_mask, 0.5, 255.0, cv::THRESH_BINARY);
862 cv::Mat roi_u8; roi_mask.convertTo(roi_u8, CV_8U);
863
864 // Detect object contours that are inside the scaled rect:
865 std::vector<std::vector<cv::Point>> polys;
866 cv::findContours(roi_u8, polys, contour_hierarchy, cv::RETR_EXTERNAL,
867 cv::CHAIN_APPROX_SIMPLE, scaled_rect.tl()); // or CHAIN_APPROX_NONE
868
869 // Pick the largest poly:
870 size_t polyidx = 0; size_t largest_poly_size = 0; size_t j = 0;
871 for (auto const & p : polys)
872 {
873 if (p.size() > largest_poly_size) { largest_poly_size = p.size(); polyidx = j; }
874 ++j;
875 }
876
877 // Scale from mask to blob to image:
878 if (polys.empty() == false)
879 for (cv::Point & pt : polys[polyidx])
880 {
881 float x = pt.x * mask_scale, y = pt.y * mask_scale;
882 preproc->b2i(x, y);
883 poly.emplace_back(cv::Point(x, y));
884 }
885 }
886
887 // Rescale the box from blob to (processing) image:
888 cv::Point2f tl = b.tl(); preproc->b2i(tl.x, tl.y);
889 cv::Point2f br = b.br(); preproc->b2i(br.x, br.y);
890 b.x = tl.x; b.y = tl.y; b.width = br.x - tl.x; b.height = br.y - tl.y;
891
892 // Store this detection for later report:
893 jevois::ObjReco o { confidences[idx] * 100.0f, label };
894 std::vector<jevois::ObjReco> ov;
895 ov.emplace_back(o);
896 jevois::ObjDetect od { b.x, b.y, b.x + b.width, b.y + b.height, ov, poly };
897 itsDetections.emplace_back(od);
898 }
899 }
900
901#ifdef JEVOIS_PRO
902 // Increment a counter each time we run, used during start-up of YOLOjevois:
903 ++itsLastProcessedNum;
904#endif
905}
906
907// ####################################################################################################
909 jevois::OptGUIhelper * helper, bool overlay,
910 bool idle)
911{
912 // If running YOLOjevois, do not display garbage while the aux models are loading; and trigger the loading if needed:
913#ifdef JEVOIS_PRO
914 if (itsYOLOjevois)
915 {
916 if (itsYOLOjevoisIsSetup == false)
917 {
918 // Find our main network so that YOLOjevois can update its extra inputs later. We assume that there is a
919 // sub-component named "network" that is a sibling of us:
920 std::vector<std::string> dd = jevois::split(Component::descriptor(), ":"); dd.pop_back();
921 std::shared_ptr<jevois::Component> comp = engine()->getComponent(dd[0]); dd.erase(dd.begin());
922 for (std::string const & c : dd) { comp = comp->getSubComponent(c); if (!comp) LFATAL("Internal error"); }
923 auto net = comp->getSubComponent<jevois::dnn::Network>("network");
924
925 itsYOLOjevois->setup(itsPerClassThreshs.size(), helper, net);
926 itsYOLOjevoisIsSetup = true;
927 }
928 if (itsYOLOjevois->ready() == false) { itsWaitingForYOLOjevoisNum = itsLastProcessedNum; return; }
929
930 // Just after YOLOjevois is ready, the net might not have updated its outputs yet. So we need to wait until the main
931 // network and our process() have run one more time before we can display valid boxes:
932 if (itsLastProcessedNum < itsWaitingForYOLOjevoisNum + 2) return;
933 }
934#endif
935
936 bool const serreport = serialreport::get();
937
938 for (jevois::ObjDetect const & o : itsDetections)
939 {
940 std::string categ, label;
941
942 if (o.reco.empty())
943 {
944 categ = "unknown";
945 label = "unknown";
946 }
947 else
948 {
949 categ = o.reco[0].category;
950 label = jevois::sformat("%s: %.2f", categ.c_str(), o.reco[0].score);
951 }
952
953 // If desired, draw boxes in output image:
954 if (outimg && overlay)
955 {
956 jevois::rawimage::drawRect(*outimg, o.tlx, o.tly, o.brx - o.tlx, o.bry - o.tly, 2, jevois::yuyv::LightGreen);
957 if (o.contour.empty() == false) LERROR("Need to implement drawPoly() for RawImage");
958 jevois::rawimage::writeText(*outimg, label, o.tlx + 6, o.tly + 2, jevois::yuyv::LightGreen,
960 }
961
962#ifdef JEVOIS_PRO
963 // If desired, draw results on GUI:
964 if (helper)
965 {
966 int col = jevois::dnn::stringToRGBA(categ, 0xff);
967 helper->drawRect(o.tlx, o.tly, o.brx, o.bry, col, true);
968 if (o.contour.empty() == false) helper->drawPoly(o.contour, col, false);
969 helper->drawText(o.tlx + 3.0f, o.tly + 3.0f, label.c_str(), col);
970 }
971#endif
972
973 // If desired, send results to serial port:
974 if (mod && serreport) mod->sendSerialObjDetImg2D(itsImageSize.width, itsImageSize.height, o);
975 }
976
977 // Possibly draw additional open-world settings window:
978#ifdef JEVOIS_PRO
979 if (helper && itsPerClassThreshs.empty() == false) drawWorldGUI(helper, idle);
980#else
981 (void)helper; (void)idle; // keep compiler happy
982#endif
983}
984
985// ####################################################################################################
986std::vector<jevois::ObjDetect> const & jevois::dnn::PostProcessorDetect::latestDetections() const
987{ return itsDetections; }
988
989#ifdef JEVOIS_PRO
990
991// ####################################################################################################
993{
994 // Caller must guarantee that, if YOLOjevois is used, it has been setup and is ready.
995 // If YOLOjevois is not used, class names will not be editable (e.g., reparameterized YOLO-World)
996
997 size_t const nclass = itsPerClassThreshs.size();
998 bool has_text_encoder = false, has_image_encoder = false;
999
1000 if (itsYOLOjevois)
1001 {
1002 has_text_encoder = (itsYOLOjevois->textEmbeddingSize() != 0);
1003 has_image_encoder = (itsYOLOjevois->imageEmbeddingSize() != 0);
1004 }
1005
1006 static int livestate = -1; static ImVec2 livetl { 0.0F, 0.0F}; static ImVec2 livebr { 0.0F, 0.0F };
1007 static size_t liveclsid = 0;
1008
1009 // Present an interactive window if GUI is not idle:
1010 if (idle == false)
1011 {
1012 ImGui::PushStyleColor(ImGuiCol_WindowBg, 0xf0e0ffff);
1013 ImGui::SetNextWindowSize(ImVec2(1300, 500), ImGuiCond_FirstUseEver);
1014 if (ImGui::Begin("Open-World Detection settings", nullptr /* no closing */))
1015 {
1016 for (size_t i = 0; i < nclass; ++i)
1017 {
1018 ImGui::AlignTextToFramePadding();
1019 ImGui::Text("Class %2zu:", i);
1020 ImGui::SameLine();
1021
1022 // Grey out the item if it is disabled:
1023 int textflags = ImGuiInputTextFlags_EnterReturnsTrue;
1024 if (has_text_encoder == false)
1025 {
1026 ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true);
1027 ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.8f);
1028 textflags |= ImGuiInputTextFlags_ReadOnly;
1029 }
1030
1031 // We need a unique ID for each ImGui widget, and we will use no visible widget name:
1032 static char wname[32]; snprintf(wname, 32, "##ywl%zu", i);
1033
1034 // Create a text box for class name:
1035 std::string const label = jevois::dnn::getLabel(itsLabels, i, false);
1036 char buf[256]; strncpy(buf, label.c_str(), sizeof(buf)-1);
1037 ImGui::PushItemWidth(400);
1038 if (ImGui::InputText(wname, buf, sizeof(buf), textflags))
1039 try { itsYOLOjevois->update(i, buf); }
1040 catch (...) { helper->reportAndIgnoreException(); }
1041 ImGui::PopItemWidth();
1042
1043 // Show image if that class is hovered and was set by image:
1044 if (ImGui::IsItemHovered())
1045 try
1046 {
1047 cv::Mat icon = itsYOLOjevois->image(i);
1048 if (icon.empty() == false)
1049 {
1050 itsHoverImage.load(icon, false);
1051 ImVec2 const pos = ImGui::GetMousePos() + ImVec2(20, 20);
1052 ImVec2 const siz(128, 128);
1053 auto fdl = ImGui::GetForegroundDrawList();
1054 itsHoverImage.draw(pos, siz, fdl);
1055 fdl->AddRect(pos, pos+siz, 0xFF808080, 0.0F, ImDrawFlags_None, 2);
1056 }
1057 }
1058 catch (...) { helper->reportAndIgnoreException(); }
1059
1060 // Restore any grey out:
1061 if (has_text_encoder == false)
1062 {
1063 ImGui::PopItemFlag();
1064 ImGui::PopStyleVar();
1065 }
1066
1067 // Create a slider for the confidence threshold:
1068 ImGui::SameLine();
1069 snprintf(wname, 32, "##yws%zu", i);
1070 float v = itsPerClassThreshs[i] * 100.0F;
1071 ImGui::PushItemWidth(250);
1072 if (ImGui::SliderFloat(wname, &v, 0.01F, 100.0F)) itsPerClassThreshs[i] = v * 0.01F;
1073 ImGui::PopItemWidth();
1074
1075 // Possibly create a button for live image capture:
1076 if (has_image_encoder)
1077 {
1078 ImGui::SameLine();
1079 snprintf(wname, 32, "Live select##%d", int(i));
1080 if (ImGui::Button(wname) && livestate == -1) { livestate = 0; liveclsid = i; }
1081 }
1082 }
1083
1084 // Allow users to save as a custom pipeline:
1085 ImGui::Separator();
1086 ImGui::AlignTextToFramePadding();
1087 static char customname[256] = "yolo-jevois-custom";
1088 ImGui::TextUnformatted("Custom pipeline name: ");
1089 ImGui::SameLine();
1090 ImGui::PushItemWidth(450);
1091 ImGui::InputText("##scp", customname, sizeof(customname));
1092 ImGui::PopItemWidth();
1093 ImGui::SameLine();
1094
1095 if (ImGui::Button("Save"))
1096 try
1097 {
1098 // Get our parent Pipeline so we can get params from it:
1099 std::vector<std::string> dd = jevois::split(Component::descriptor(), ":"); dd.pop_back();
1100 std::string pipeinst = dd.back(); dd.pop_back();
1101 std::shared_ptr<jevois::Component> comp = engine()->getComponent(dd[0]); dd.erase(dd.begin());
1102 for (std::string const & c : dd) { comp = comp->getSubComponent(c); if (!comp) LFATAL("Internal error"); }
1103 auto pipe = comp->getSubComponent<jevois::dnn::Pipeline>(pipeinst);
1104
1105 std::vector<std::pair<std::string /*name*/, std::string /*value*/>> const & settings = pipe->zooSettings();
1106
1107 std::string basename = jevois::absolutePath(JEVOIS_CUSTOM_DNN_PATH, customname);
1108
1109 cv::FileStorage fs(basename + ".yml", cv::FileStorage::WRITE | cv::FileStorage::FORMAT_YAML);
1110 if (fs.isOpened() == false) LFATAL("Failed to write " << basename << ".yml");
1111
1112 fs << customname << "{";
1113
1114 std::string classes;
1115 for (auto const & s : settings)
1116 if (s.first == "classes") classes = s.second;
1117 else if (s.first != "perclassthresh") fs << s.first << s.second;
1118
1119 // Save the current per-class thresholds:
1120 std::vector<std::string> pcth;
1121 for (float t : itsPerClassThreshs) pcth.emplace_back(jevois::sformat("%.2f", t * 100.0F));
1122 fs << "perclassthresh" << ('"' + jevois::join(pcth, " ") + '"');
1123
1124 // If using CLIP, save the current class names or images:
1125 if (itsYOLOjevois)
1126 {
1127 std::vector<std::string> cls;
1128 for (size_t i = 0; i < nclass; ++i)
1129 if (itsYOLOjevois->image(i).empty())
1130 {
1131 // Save the text class description:
1132 cls.push_back(itsLabels[i]);
1133 }
1134 else
1135 {
1136 // Save the grabbed image for that class:
1137 std::string const imgname = basename + "-cls" + std::to_string(i) + ".png";
1138 cv::Mat img_bgr; cv::cvtColor(itsYOLOjevois->image(i), img_bgr, cv::COLOR_RGB2BGR);
1139 cv::imwrite(imgname, img_bgr);
1140 cls.emplace_back("imagefile:" + imgname);
1141 }
1142 fs << "classes" << ('"' + jevois::join(cls, ", ") + '"');
1143 }
1144 else if (classes.empty() == false)
1145 fs << "classes" << classes;
1146
1147 fs << "}";
1148 helper->reportInfo("Custom model definition saved to " + basename + ".yml");
1149 }
1150 catch (...) { helper->reportAndIgnoreException(); }
1151 }
1152 ImGui::End();
1153 ImGui::PopStyleColor();
1154 }
1155
1156 // Are we doing a live selection?
1157 if (livestate != -1 && helper->selectImageBox(livestate, livetl, livebr))
1158 {
1159 // Selection complete. First convert coords from display to image:
1160 ImVec2 tl = helper->d2i(livetl, "c");
1161 ImVec2 br = helper->d2i(livebr, "c");
1162
1163 // Extract ROI from our high-res input frame:
1164 jevois::InputFrame const * inframe = helper->getInputFrame();
1165 cv::Mat hdimg = inframe->getCvRGB();
1166 cv::Rect r(cv::Point(tl.x, tl.y), cv::Point(br.x, br.y));
1167 cv::Mat roi = hdimg(r).clone();
1168
1169 // Compute CLIP image embedding and main network:
1170 itsYOLOjevois->update(liveclsid, roi);
1171 }
1172}
1173
1174#endif
#define JEVOIS_CUSTOM_DNN_PATH
Directory where custom DNN models are stored:
Definition Config.H:85
#define o
Definition Font10x20.C:6
#define LTHROW(msg)
Definition Log.H:251
std::string descriptor() const
Get our full descriptor (including all parents) as [Instancename]:[...]:[...].
Definition Component.C:276
Helper class to assist modules in creating graphical and GUI elements.
Definition GUIhelper.H:133
void reportAndIgnoreException(std::string const &prefix="")
Report current exception in a modal dialog, then ignore it.
Definition GUIhelper.C:2708
void drawText(float x, float y, char const *txt, ImU32 col=IM_COL32(128, 255, 128, 255))
Draw text over an image.
Definition GUIhelper.C:634
void drawRect(float x1, float y1, float x2, float y2, ImU32 col=IM_COL32(128, 255, 128, 255), bool filled=true)
Draw rectangular box over an image.
Definition GUIhelper.C:488
bool selectImageBox(int &state, ImVec2 &tl, ImVec2 &br, ImU32 col=IM_COL32(128, 255, 128, 255))
Helper to select a rectangular box by dragging the mouse over the display.
Definition GUIhelper.C:1736
InputFrame const * getInputFrame() const
Get access to the InputFrame last drawn with drawInputFrame()
Definition GUIhelper.C:390
void drawPoly(std::vector< cv::Point > const &pts, ImU32 col=IM_COL32(128, 255, 128, 255), bool filled=true)
Draw polygon over an image.
Definition GUIhelper.C:524
void reportInfo(std::string const &inf)
Report a transient info message in an overlay window.
Definition GUIhelper.C:2669
ImVec2 d2i(ImVec2 p, char const *name=nullptr)
Convert coordinates of a point from on-screen to within a rendered image.
Definition GUIhelper.C:712
Exception-safe wrapper around a raw camera input frame.
Definition InputFrame.H:51
cv::Mat getCvRGB(bool casync=false) const
Shorthand to get the input image as a RGB cv::Mat and release the raw buffer.
Definition InputFrame.C:134
std::shared_ptr< Comp > getComponent(std::string const &instanceName) const
Get a top-level component by instance name.
A raw image as coming from a V4L2 Camera and/or being sent out to a USB Gadget.
Definition RawImage.H:111
Base class for a module that supports standardized serial messages.
Definition Module.H:234
void sendSerialObjDetImg2D(unsigned int camw, unsigned int camh, float x, float y, float w, float h, std::vector< ObjReco > const &res)
Send a standardized object detection + recognition message.
Definition Module.C:572
Abstract class to represent a neural network.
Definition Network.H:208
Neural processing pipeline.
Definition Pipeline.H:148
std::vector< std::pair< std::string, std::string > > const & zooSettings() const
Get access to the settings that were loaded from the zoo.
Definition Pipeline.C:562
void drawWorldGUI(jevois::GUIhelper *helper, bool idle)
Draw a GUI window to allow one to modify per-class thresholds (YOLO-World) and class names (YOLO-JeVo...
void onParamChange(postprocessor::detecttype const &param, postprocessor::DetectType const &val) override
void report(jevois::StdModule *mod, jevois::RawImage *outimg=nullptr, jevois::OptGUIhelper *helper=nullptr, bool overlay=true, bool idle=false) override
Report what happened in last process() to console/output video/GUI.
void process(std::vector< cv::Mat > const &outs, PreProcessor *preproc) override
Process outputs and draw/send some results.
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
virtual ~PostProcessorDetect()
Destructor.
std::vector< ObjDetect > const & latestDetections() const
Get the latest detections, use with caution, not thread-safe.
Pre-Processor for neural network pipeline.
cv::Size const & imagesize() const
Access the last processed image size.
void b2i(float &x, float &y, size_t blobnum=0)
Convert coordinates from blob back to original image.
cv::Size blobsize(size_t num) const
Access the width and height of a given blob, accounting for NCHW or NHWC.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition Log.H:230
#define LERROR(msg)
Convenience macro for users to print out console or syslog messages, ERROR level.
Definition Log.H:211
std::map< int, std::string > getClassLabels(std::string const &arg)
Get class labels from either a list or a file.
Definition Utils.C:25
std::string getLabel(std::map< int, std::string > const &labels, int id, bool namedonly=false)
Get a label from an id.
Definition Utils.C:85
float sigmoid(float x)
Compute sigmoid using fastexp.
void clamp(cv::Rect &r, int width, int height)
Clamp a rectangle to within given image width and height.
Definition Utils.C:408
float softmax_dfl(float const *src, float *dst, size_t const n, size_t const stride=1)
Compute softmax and return DFL distance.
Definition Utils.C:769
size_t effectiveDims(cv::Mat const &m)
Returns the number of non-unit dims in a cv::Mat.
Definition Utils.C:927
int stringToRGBA(std::string const &label, unsigned char alpha=128)
Compute a color from a label name.
Definition Utils.C:97
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition Utils.C:126
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
Write some text in an image.
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
Draw a rectangle in a YUYV image.
std::string join(std::vector< T > const &tokens, std::string const &delimiter)
Concatenate a vector of tokens into a string.
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition Utils.C:440
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition Utils.C:386
std::vector< std::string > split(std::string const &input, std::string const &regex="\\s+")
Split string into vector of tokens using a regex to specify what to split on; default regex splits by...
Definition Utils.C:270
unsigned short constexpr LightGreen
YUYV color value.
Definition RawImage.H:63
A trivial struct to store object detection results, for standard (straight up) bounding boxes.
Definition ObjDetect.H:29
A trivial struct to store object recognition results.
Definition ObjReco.H:25