113 if (outs.empty())
LFATAL(
"No outputs received, we need at least one.");
114 cv::Mat
const & out = outs[0]; cv::MatSize
const & msiz = out.size;
116 float confThreshold = cthresh::get() * 0.01F;
117 float const boxThreshold = dthresh::get() * 0.01F;
118 float const nmsThreshold = nms::get() * 0.01F;
119 bool const sigmo = sigmoid::get();
120 bool const clampbox = boxclamp::get();
121 int const fudge = classoffset::get();
122 bool const smoothmsk = masksmooth::get();
130 cv::Size
const bsiz = preproc->
blobsize(0);
134 std::vector<int> classIds;
135 std::vector<float> confidences;
136 std::vector<cv::Rect> boxes;
137 std::vector<cv::Mat> mask_coeffs;
139 int mask_proto_h = 1;
144 switch(detecttype::get())
147 case jevois::dnn::postprocessor::DetectType::FasterRCNN:
149 if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
150 LTHROW(
"Expected 1 output blob with shape 1x1xNx7 for N detections with values "
151 "[batchId, classId, confidence, left, top, right, bottom]");
153 float const * data = (
float const *)out.data;
154 for (
size_t i = 0; i < out.total(); i += 7)
156 float confidence = data[i + 2];
157 if (confidence > confThreshold)
159 int left = (int)data[i + 3];
160 int top = (int)data[i + 4];
161 int right = (int)data[i + 5];
162 int bottom = (int)data[i + 6];
163 int width = right - left + 1;
164 int height = bottom - top + 1;
165 classIds.push_back((
int)(data[i + 1]) + fudge);
166 boxes.push_back(cv::Rect(left, top, width, height));
167 confidences.push_back(confidence);
174 case jevois::dnn::postprocessor::DetectType::SSD:
176 if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
177 LTHROW(
"Expected 1 output blob with shape 1x1xNx7 for N detections with values "
178 "[batchId, classId, confidence, left, top, right, bottom]");
180 float const * data = (
float const *)out.data;
181 for (
size_t i = 0; i < out.total(); i += 7)
183 float confidence = data[i + 2];
184 if (confidence > confThreshold)
186 int left = (int)(data[i + 3] * bsiz.width);
187 int top = (int)(data[i + 4] * bsiz.height);
188 int right = (int)(data[i + 5] * bsiz.width);
189 int bottom = (int)(data[i + 6] * bsiz.height);
190 int width = right - left + 1;
191 int height = bottom - top + 1;
192 classIds.push_back((
int)(data[i + 1]) + fudge);
193 boxes.push_back(cv::Rect(left, top, width, height));
194 confidences.push_back(confidence);
201 case jevois::dnn::postprocessor::DetectType::TPUSSD:
203 if (outs.size() != 4)
204 LTHROW(
"Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
205 cv::Mat
const & bboxes = outs[0];
206 cv::Mat
const & ids = outs[1];
207 cv::Mat
const & scores = outs[2];
208 cv::Mat
const & count = outs[3];
209 if (bboxes.total() != 4 * ids.total() || bboxes.total() != 4 * scores.total() || count.total() != 1)
210 LTHROW(
"Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
212 size_t num = count.at<
float>(0);
213 if (num > ids.total())
LTHROW(
"Too many detections: " << num <<
" for only " << ids.total() <<
" ids");
214 float const * bb = (
float const *)bboxes.data;
216 for (
size_t i = 0; i < num; ++i)
218 if (scores.at<
float>(i) < confThreshold)
continue;
220 int top = (int)(bb[4 * i] * bsiz.height);
221 int left = (int)(bb[4 * i + 1] * bsiz.width);
222 int bottom = (int)(bb[4 * i + 2] * bsiz.height);
223 int right = (int)(bb[4 * i + 3] * bsiz.width);
224 int width = right - left + 1;
225 int height = bottom - top + 1;
226 classIds.push_back((
int)(ids.at<
float>(i)) + fudge);
227 boxes.push_back(cv::Rect(left, top, width, height));
228 confidences.push_back(scores.at<
float>(i));
234 case jevois::dnn::postprocessor::DetectType::YOLO:
236 for (
size_t i = 0; i < outs.size(); ++i)
240 cv::Mat
const & out = outs[i];
241 cv::MatSize
const & ms = out.size;
int const nd = ms.dims();
242 int nbox = -1, ndata = -1;
248 for (
int i = 0; i < nd-2; ++i)
if (ms[i] != 1) nbox = -1;
251 if (nbox < 0 || ndata < 5)
252 LTHROW(
"Expected 1 or more output blobs with shape Nx(5+C) where N is the number of "
253 "detected objects, C is the number of classes, and the first 5 columns are "
254 "[center_x, center_y, width, height, box score]. // "
256 ": need Nx(5+C) or 1xNx(5+C)");
259 int sz2[] = { nbox, ndata };
260 cv::Mat
const out2(2, sz2, out.type(), out.data);
262 float const * data = (
float const *)out2.data;
263 for (
int j = 0; j < nbox; ++j, data += ndata)
265 if (data[4] < boxThreshold)
continue;
267 cv::Mat scores = out2.row(j).colRange(5, ndata);
268 cv::Point classIdPoint;
double confidence;
269 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
271 if (confidence < confThreshold)
continue;
274 int centerX, centerY, width, height;
277 centerX = (int)(data[0] * bsiz.width);
278 centerY = (int)(data[1] * bsiz.height);
279 width = (int)(data[2] * bsiz.width);
280 height = (int)(data[3] * bsiz.height);
285 centerX = (int)(data[0]);
286 centerY = (int)(data[1]);
287 width = (int)(data[2]);
288 height = (int)(data[3]);
291 int left = centerX - width / 2;
292 int top = centerY - height / 2;
293 boxes.push_back(cv::Rect(left, top, width, height));
294 classIds.push_back(classIdPoint.x);
295 confidences.push_back((
float)confidence);
302 case jevois::dnn::postprocessor::DetectType::YOLOv10:
304 for (
size_t i = 0; i < outs.size(); ++i)
306 cv::Mat
const & out = outs[i];
307 cv::MatSize
const & ms = out.size;
int const nd = ms.dims();
310 LTHROW(
"Expected 1 or more output blobs with shape Nx(4+C) where N is the number of "
311 "detected objects, C is the number of classes, and the first 4 columns are "
312 "[x1, y1, x2, y2]. // "
317 int const nbox = ms[nd-2];
318 int const ndata = ms[nd-1];
319 int sz2[] = { nbox, ndata };
320 cv::Mat
const out2(2, sz2, out.type(), out.data);
323 float const * data = (
float const *)out2.data;
324 for (
int j = 0; j < nbox; ++j, data += ndata)
326 cv::Mat scores = out2.row(j).colRange(4, ndata);
327 cv::Point classIdPoint;
double confidence;
328 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
330 if (confidence < confThreshold)
continue;
333 boxes.push_back(cv::Rect(data[0], data[1], data[2]-data[0]+1, data[3]-data[1]+1));
334 classIds.push_back(classIdPoint.x);
335 confidences.push_back((
float)confidence);
342 case jevois::dnn::postprocessor::DetectType::YOLOv10pp:
344 if (outs.size() != 1 || msiz.dims() != 3 || msiz[0] != 1 || msiz[2] != 6)
345 LTHROW(
"Expected 1 output blob with shape 1xNx6 for N detections with values "
346 "[left, top, right, bottom, confidence, classId]");
348 float const * data = (
float const *)out.data;
349 for (
size_t i = 0; i < out.total(); i += 6)
351 float confidence = data[i + 4];
352 if (confidence > confThreshold)
355 int left = (int)data[i + 0];
356 int top = (int)data[i + 1];
357 int right = (int)data[i + 2];
358 int bottom = (int)data[i + 3];
359 int width = right - left + 1;
360 int height = bottom - top + 1;
361 classIds.push_back((
int)(data[i + 5]) + fudge);
362 boxes.push_back(cv::Rect(left, top, width, height));
363 confidences.push_back(confidence);
370 case jevois::dnn::postprocessor::DetectType::RAWYOLO:
372 if (itsYOLO) itsYOLO->yolo(outs, classIds, confidences, boxes, itsLabels.size(), boxThreshold, confThreshold,
373 bsiz, fudge, maxnbox::get(), sigmo);
374 else LFATAL(
"Internal error -- no YOLO subcomponent");
379 case jevois::dnn::postprocessor::DetectType::YOLOX:
381 if ((outs.size() % 3) != 0 || msiz.dims() != 4 || msiz[0] != 1)
382 LTHROW(
"Expected several (usually 3, for 3 strides) sets of 3 blobs: 1xHxWxC (class scores), 1xHxWx4 (boxes), "
383 "1xHxWx1 (objectness scores)");
387 for (
size_t idx = 0; idx < outs.size(); idx += 3)
389 cv::Mat
const & cls = outs[idx]; cv::MatSize
const & cls_siz = cls.size;
390 if (cls_siz.dims() != 4)
LTHROW(
"Output " << idx <<
" is not 4D 1xHxWxC");
391 float const * cls_data = (
float const *)cls.data;
393 cv::Mat
const & bx = outs[idx + 1]; cv::MatSize
const & bx_siz = bx.size;
394 if (bx_siz.dims() != 4 || bx_siz[3] != 4)
LTHROW(
"Output " << idx <<
" is not 1xHxWx4");
395 float const * bx_data = (
float const *)bx.data;
397 cv::Mat
const & obj = outs[idx + 2]; cv::MatSize
const & obj_siz = obj.size;
398 if (obj_siz.dims() != 4 || obj_siz[3] != 1)
LTHROW(
"Output " << idx <<
" is not 1xHxWx1");
399 float const * obj_data = (
float const *)obj.data;
401 for (
int i = 1; i < 3; ++i)
402 if (cls_siz[i] != bx_siz[i] || cls_siz[i] != obj_siz[i])
403 LTHROW(
"Mismatched HxW sizes for outputs " << idx <<
" .. " << idx + 2);
405 size_t const nclass = cls_siz[3];
408 for (
int y = 0; y < cls_siz[1]; ++y)
409 for (
int x = 0; x < cls_siz[2]; ++x)
412 float objectness = obj_data[0];
413 if (objectness >= boxThreshold)
416 size_t best_idx = 0;
float confidence = cls_data[0];
417 for (
size_t i = 1; i < nclass; ++i)
418 if (cls_data[i] > confidence) { confidence = cls_data[i]; best_idx = i; }
420 confidence *= objectness;
422 if (confidence >= confThreshold)
425 float cx = (x + bx_data[0]) * stride;
426 float cy = (y + bx_data[1]) * stride;
427 float width = std::exp(bx_data[2]) * stride;
428 float height = std::exp(bx_data[3]) * stride;
429 float left = cx - 0.5F * width;
430 float top = cy - 0.5F * height;
433 boxes.push_back(cv::Rect(left, top, width, height));
434 classIds.push_back(
int(best_idx) + fudge);
435 confidences.push_back(confidence);
452 case jevois::dnn::postprocessor::DetectType::YOLOv8t:
453 case jevois::dnn::postprocessor::DetectType::YOLOjevoist:
455 if ((outs.size() % 2) != 0 || msiz.dims() != 4 || msiz[0] != 1)
456 LTHROW(
"Expected several (usually 3, for 3 strides) sets of 2 blobs: 1xHxWx64 (raw boxes) "
457 "and 1xHxWxC (class scores)");
460 int constexpr reg_max = 16;
462 for (
size_t idx = 0; idx < outs.size(); idx += 2)
464 cv::Mat
const & bx = outs[idx]; cv::MatSize
const & bx_siz = bx.size;
465 if (bx_siz.dims() != 4 || bx_siz[3] != 4 * reg_max)
LTHROW(
"Output " << idx <<
" is not 4D 1xHxWx64");
466 float const * bx_data = (
float const *)bx.data;
468 cv::Mat
const & cls = outs[idx + 1]; cv::MatSize
const & cls_siz = cls.size;
469 if (cls_siz.dims() != 4)
LTHROW(
"Output " << idx <<
" is not 4D 1xHxWxC");
470 float const * cls_data = (
float const *)cls.data;
471 size_t const nclass = cls_siz[3];
473 if (itsPerClassThreshs.empty() ==
false && itsPerClassThreshs.size() != nclass)
474 LTHROW(
"Output tensor has " << nclass <<
" classes but " << itsPerClassThreshs.size() <<
475 " values given in perclassthresh -- both must match");
477 for (
int i = 1; i < 3; ++i)
478 if (cls_siz[i] != bx_siz[i])
LTHROW(
"Mismatched HxW sizes for outputs " << idx <<
" .. " << idx + 1);
481 for (
int y = 0; y < cls_siz[1]; ++y)
482 for (
int x = 0; x < cls_siz[2]; ++x)
485 size_t best_idx = 0;
float confidence = cls_data[0];
487 if (itsPerClassThreshs.empty())
490 for (
size_t i = 1; i < nclass; ++i)
491 if (cls_data[i] > confidence) { confidence = cls_data[i]; best_idx = i; }
500 for (
size_t i = 1; i < nclass; ++i)
502 float c = cls_data[i];
504 if (c / itsPerClassThreshs[i] > confidence / itsPerClassThreshs[best_idx])
505 { confidence = c; best_idx = i; }
507 confThreshold = itsPerClassThreshs[best_idx];
510 if (confidence >= confThreshold)
516 float xmin = (x + 0.5f -
softmax_dfl(bx_data, dst, reg_max)) * stride;
517 float ymin = (y + 0.5f -
softmax_dfl(bx_data + reg_max, dst, reg_max)) * stride;
518 float xmax = (x + 0.5f +
softmax_dfl(bx_data + 2 * reg_max, dst, reg_max)) * stride;
519 float ymax = (y + 0.5f +
softmax_dfl(bx_data + 3 * reg_max, dst, reg_max)) * stride;
522 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
523 classIds.push_back(
int(best_idx) + fudge);
524 confidences.push_back(confidence);
529 bx_data += 4 * reg_max;
539 case jevois::dnn::postprocessor::DetectType::YOLOv8:
540 case jevois::dnn::postprocessor::DetectType::YOLOjevois:
542 if ((outs.size() % 2) != 0 || msiz.dims() != 4 || msiz[0] != 1)
543 LTHROW(
"Expected several (usually 3, for 3 strides) sets of 2 blobs: 1x64xHxW (raw boxes) "
544 "and 1xCxHxW (class scores)");
547 int constexpr reg_max = 16;
549 for (
size_t idx = 0; idx < outs.size(); idx += 2)
551 cv::Mat
const & bx = outs[idx]; cv::MatSize
const & bx_siz = bx.size;
552 if (bx_siz.dims() != 4 || bx_siz[1] != 4 * reg_max)
LTHROW(
"Output " << idx <<
" is not 4D 1x64xHxW");
553 float const * bx_data = (
float const *)bx.data;
555 cv::Mat
const & cls = outs[idx + 1]; cv::MatSize
const & cls_siz = cls.size;
556 if (cls_siz.dims() != 4)
LTHROW(
"Output " << idx <<
" is not 4D 1xCxHxW");
557 float const * cls_data = (
float const *)cls.data;
558 size_t const nclass = cls_siz[1];
560 if (itsPerClassThreshs.empty() ==
false && itsPerClassThreshs.size() != nclass)
561 LTHROW(
"Output tensor has " << nclass <<
" classes but " << itsPerClassThreshs.size() <<
562 " values given in perclassthresh -- both must match");
564 for (
int i = 2; i < 4; ++i)
565 if (cls_siz[i] != bx_siz[i])
LTHROW(
"Mismatched HxW sizes for outputs " << idx <<
" .. " << idx + 1);
567 size_t const step = cls_siz[2] * cls_siz[3];
570 for (
int y = 0; y < cls_siz[2]; ++y)
571 for (
int x = 0; x < cls_siz[3]; ++x)
574 size_t best_idx = 0;
float confidence = cls_data[0];
576 if (itsPerClassThreshs.empty())
579 for (
size_t i = 1; i < nclass; ++i)
580 if (cls_data[i * step] > confidence) { confidence = cls_data[i * step]; best_idx = i; }
589 for (
size_t i = 1; i < nclass; ++i)
591 float c = cls_data[i * step];
593 if (c / itsPerClassThreshs[i] > confidence / itsPerClassThreshs[best_idx])
594 { confidence = c; best_idx = i; }
596 confThreshold = itsPerClassThreshs[best_idx];
599 if (confidence >= confThreshold)
605 float xmin = (x + 0.5f -
softmax_dfl(bx_data, dst, reg_max, step)) * stride;
606 float ymin = (y + 0.5f -
softmax_dfl(bx_data + reg_max * step, dst, reg_max, step)) * stride;
607 float xmax = (x + 0.5f +
softmax_dfl(bx_data + 2 * reg_max * step, dst, reg_max, step)) * stride;
608 float ymax = (y + 0.5f +
softmax_dfl(bx_data + 3 * reg_max * step, dst, reg_max, step)) * stride;
611 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
612 classIds.push_back(
int(best_idx) + fudge);
613 confidences.push_back(confidence);
628 case jevois::dnn::postprocessor::DetectType::YOLOv8seg:
630 if (outs.size() % 3 != 1 || msiz.dims() != 4 || msiz[0] != 1)
631 LTHROW(
"Expected several (usually 3, for 3 strides) sets of 3 tensors: 1x64xHxW (raw boxes), "
632 "1xCxHxW (class scores), and 1xMxHxW (mask coeffs for M masks); then one 1xMxHxW for M mask prototypes");
635 int constexpr reg_max = 16;
638 cv::MatSize
const & mps = outs.back().size;
639 if (mps.dims() != 4)
LTHROW(
"Mask prototypes not 4D 1xMxHxW");
640 mask_proto = cv::Mat(std::vector<int>{ mps[1], mps[2] * mps[3] }, CV_32F, outs.back().data);
641 int const mask_num = mps[1];
642 mask_proto_h = mps[2];
645 for (
size_t idx = 0; idx < outs.size() - 1; idx += 3)
647 cv::Mat
const & bx = outs[idx]; cv::MatSize
const & bx_siz = bx.size;
648 if (bx_siz.dims() != 4 || bx_siz[1] != 4 * reg_max)
LTHROW(
"Output " << idx <<
" is not 4D 1x64xHxW");
649 float const * bx_data = (
float const *)bx.data;
651 cv::Mat
const & cls = outs[idx + 1]; cv::MatSize
const & cls_siz = cls.size;
652 if (cls_siz.dims() != 4)
LTHROW(
"Output " << idx <<
" is not 4D 1xCxHxW");
653 float const * cls_data = (
float const *)cls.data;
654 size_t const nclass = cls_siz[1];
656 cv::Mat
const & msk = outs[idx + 2]; cv::MatSize
const & msk_siz = msk.size;
657 if (msk_siz.dims() != 4 || msk_siz[1] != mask_num)
LTHROW(
"Output " << idx <<
" is not 4D 1xMxHxW");
658 float const * msk_data = (
float const *)msk.data;
660 for (
int i = 2; i < 4; ++i)
661 if (cls_siz[i] != bx_siz[i] || cls_siz[i] != msk_siz[i])
662 LTHROW(
"Mismatched HxW sizes for outputs " << idx <<
" .. " << idx + 1);
664 size_t const step = cls_siz[2] * cls_siz[3];
667 for (
int y = 0; y < cls_siz[2]; ++y)
668 for (
int x = 0; x < cls_siz[3]; ++x)
671 size_t best_idx = 0;
float confidence = cls_data[0];
672 for (
size_t i = 1; i < nclass; ++i)
673 if (cls_data[i * step] > confidence) { confidence = cls_data[i * step]; best_idx = i; }
678 if (confidence >= confThreshold)
683 float xmin = (x + 0.5f -
softmax_dfl(bx_data, dst, reg_max, step)) * stride;
684 float ymin = (y + 0.5f -
softmax_dfl(bx_data + reg_max * step, dst, reg_max, step)) * stride;
685 float xmax = (x + 0.5f +
softmax_dfl(bx_data + 2 * reg_max * step, dst, reg_max, step)) * stride;
686 float ymax = (y + 0.5f +
softmax_dfl(bx_data + 3 * reg_max * step, dst, reg_max, step)) * stride;
689 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
690 classIds.push_back(
int(best_idx) + fudge);
691 confidences.push_back(confidence);
694 cv::Mat coeffs(1, mask_num, CV_32F);
float * cptr = (
float *)coeffs.data;
695 for (
int i = 0; i < mask_num; ++i) *cptr++ = msk_data[i * step];
696 mask_coeffs.emplace_back(coeffs);
700 ++cls_data; ++bx_data; ++msk_data;
710 case jevois::dnn::postprocessor::DetectType::YOLOv8segt:
712 if (outs.size() % 3 != 1 || msiz.dims() != 4 || msiz[0] != 1)
713 LTHROW(
"Expected several (usually 3, for 3 strides) sets of 3 tensors: 1xHxWx64 (raw boxes), "
714 "1xHxWxC (class scores), and 1xHxWxM (mask coeffs for M masks); then one 1xHxWxM for M mask prototypes");
717 int constexpr reg_max = 16;
720 cv::MatSize
const & mps = outs.back().size;
721 if (mps.dims() != 4)
LTHROW(
"Mask prototypes not 4D 1xHxWxM");
722 mask_proto = cv::Mat(std::vector<int>{ mps[1] * mps[2], mps[3] }, CV_32F, outs.back().data);
723 int const mask_num = mps[3];
724 mask_proto_h = mps[1];
727 for (
size_t idx = 0; idx < outs.size() - 1; idx += 3)
729 cv::Mat
const & bx = outs[idx]; cv::MatSize
const & bx_siz = bx.size;
730 if (bx_siz.dims() != 4 || bx_siz[3] != 4 * reg_max)
LTHROW(
"Output " << idx <<
" is not 4D 1xHxWx64");
731 float const * bx_data = (
float const *)bx.data;
733 cv::Mat
const & cls = outs[idx + 1]; cv::MatSize
const & cls_siz = cls.size;
734 if (cls_siz.dims() != 4)
LTHROW(
"Output " << idx <<
" is not 4D 1xHxWxC");
735 float const * cls_data = (
float const *)cls.data;
736 size_t const nclass = cls_siz[3];
738 cv::Mat
const & msk = outs[idx + 2]; cv::MatSize
const & msk_siz = msk.size;
739 if (msk_siz.dims() != 4 || msk_siz[3] != mask_num)
LTHROW(
"Output " << idx <<
" is not 4D 1xHxWxM");
740 float const * msk_data = (
float const *)msk.data;
742 for (
int i = 1; i < 3; ++i)
743 if (cls_siz[i] != bx_siz[i] || cls_siz[i] != msk_siz[i])
744 LTHROW(
"Mismatched HxW sizes for outputs " << idx <<
" .. " << idx + 1);
747 for (
int y = 0; y < cls_siz[1]; ++y)
748 for (
int x = 0; x < cls_siz[2]; ++x)
751 size_t best_idx = 0;
float confidence = cls_data[0];
752 for (
size_t i = 1; i < nclass; ++i)
753 if (cls_data[i] > confidence) { confidence = cls_data[i]; best_idx = i; }
758 if (confidence >= confThreshold)
763 float xmin = (x + 0.5f -
softmax_dfl(bx_data, dst, reg_max)) * stride;
764 float ymin = (y + 0.5f -
softmax_dfl(bx_data + reg_max, dst, reg_max)) * stride;
765 float xmax = (x + 0.5f +
softmax_dfl(bx_data + 2 * reg_max, dst, reg_max)) * stride;
766 float ymax = (y + 0.5f +
softmax_dfl(bx_data + 3 * reg_max, dst, reg_max)) * stride;
769 boxes.push_back(cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin));
770 classIds.push_back(
int(best_idx) + fudge);
771 confidences.push_back(confidence);
774 cv::Mat coeffs(mask_num, 1, CV_32F);
775 std::memcpy(coeffs.data, msk_data, mask_num *
sizeof(
float));
776 mask_coeffs.emplace_back(coeffs);
781 bx_data += 4 * reg_max;
782 msk_data += mask_num;
794 LTHROW(
"Unsupported Post-processor detecttype " <<
int(detecttype::get()));
798 catch (std::exception
const & e)
800 std::string err =
"Selected detecttype is " + detecttype::strget() +
" and network produced:\n\n";
802 err +=
"\nFATAL ERROR(s):\n\n";
808 std::vector<int> indices;
809 if (nmsperclass::get())
810 cv::dnn::NMSBoxesBatched(boxes, confidences, classIds, confThreshold, nmsThreshold, indices, 1.0F, maxnbox::get());
812 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices, 1.0F, maxnbox::get());
815 itsDetections.clear();
bool namonly = namedonly::get();
816 std::vector<cv::Vec4i> contour_hierarchy;
818 for (
size_t i = 0; i < indices.size(); ++i)
820 int idx = indices[i];
822 if (namonly ==
false || label.empty() ==
false)
824 cv::Rect & b = boxes[idx];
830 std::vector<cv::Point> poly;
831 if (mask_coeffs.empty() ==
false)
835 cv::Mat weighted_mask;
836 if (mask_coeffs[idx].rows == 1) weighted_mask = mask_coeffs[idx] * mask_proto;
837 else weighted_mask = mask_proto * mask_coeffs[idx];
840 weighted_mask = weighted_mask.reshape(0, mask_proto_h);
849 int mask_scale = bsiz.height / mask_proto_h;
852 cv::Mat src = weighted_mask;
853 cv::resize(src, weighted_mask, cv::Size(), mask_scale, mask_scale, cv::INTER_LINEAR);
857 cv::Rect scaled_rect(b.tl() / mask_scale, b.br() / mask_scale);
858 scaled_rect &= cv::Rect(cv::Point(0, 0), weighted_mask.size());
861 cv::Mat roi_mask; cv::threshold(weighted_mask(scaled_rect), roi_mask, 0.5, 255.0, cv::THRESH_BINARY);
862 cv::Mat roi_u8; roi_mask.convertTo(roi_u8, CV_8U);
865 std::vector<std::vector<cv::Point>> polys;
866 cv::findContours(roi_u8, polys, contour_hierarchy, cv::RETR_EXTERNAL,
867 cv::CHAIN_APPROX_SIMPLE, scaled_rect.tl());
870 size_t polyidx = 0;
size_t largest_poly_size = 0;
size_t j = 0;
871 for (
auto const & p : polys)
873 if (p.size() > largest_poly_size) { largest_poly_size = p.size(); polyidx = j; }
878 if (polys.empty() ==
false)
879 for (cv::Point & pt : polys[polyidx])
881 float x = pt.x * mask_scale, y = pt.y * mask_scale;
883 poly.emplace_back(cv::Point(x, y));
888 cv::Point2f tl = b.tl(); preproc->
b2i(tl.x, tl.y);
889 cv::Point2f br = b.br(); preproc->
b2i(br.x, br.y);
890 b.x = tl.x; b.y = tl.y; b.width = br.x - tl.x; b.height = br.y - tl.y;
894 std::vector<jevois::ObjReco> ov;
897 itsDetections.emplace_back(od);
903 ++itsLastProcessedNum;
997 size_t const nclass = itsPerClassThreshs.size();
998 bool has_text_encoder =
false, has_image_encoder =
false;
1002 has_text_encoder = (itsYOLOjevois->textEmbeddingSize() != 0);
1003 has_image_encoder = (itsYOLOjevois->imageEmbeddingSize() != 0);
1006 static int livestate = -1;
static ImVec2 livetl { 0.0F, 0.0F};
static ImVec2 livebr { 0.0F, 0.0F };
1007 static size_t liveclsid = 0;
1012 ImGui::PushStyleColor(ImGuiCol_WindowBg, 0xf0e0ffff);
1013 ImGui::SetNextWindowSize(ImVec2(1300, 500), ImGuiCond_FirstUseEver);
1014 if (ImGui::Begin(
"Open-World Detection settings",
nullptr ))
1016 for (
size_t i = 0; i < nclass; ++i)
1018 ImGui::AlignTextToFramePadding();
1019 ImGui::Text(
"Class %2zu:", i);
1023 int textflags = ImGuiInputTextFlags_EnterReturnsTrue;
1024 if (has_text_encoder ==
false)
1026 ImGui::PushItemFlag(ImGuiItemFlags_Disabled,
true);
1027 ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.8f);
1028 textflags |= ImGuiInputTextFlags_ReadOnly;
1032 static char wname[32]; snprintf(wname, 32,
"##ywl%zu", i);
1036 char buf[256]; strncpy(buf, label.c_str(),
sizeof(buf)-1);
1037 ImGui::PushItemWidth(400);
1038 if (ImGui::InputText(wname, buf,
sizeof(buf), textflags))
1039 try { itsYOLOjevois->update(i, buf); }
1041 ImGui::PopItemWidth();
1044 if (ImGui::IsItemHovered())
1047 cv::Mat icon = itsYOLOjevois->image(i);
1048 if (icon.empty() ==
false)
1050 itsHoverImage.load(icon,
false);
1051 ImVec2
const pos = ImGui::GetMousePos() + ImVec2(20, 20);
1052 ImVec2
const siz(128, 128);
1053 auto fdl = ImGui::GetForegroundDrawList();
1054 itsHoverImage.draw(pos, siz, fdl);
1055 fdl->AddRect(pos, pos+siz, 0xFF808080, 0.0F, ImDrawFlags_None, 2);
1061 if (has_text_encoder ==
false)
1063 ImGui::PopItemFlag();
1064 ImGui::PopStyleVar();
1069 snprintf(wname, 32,
"##yws%zu", i);
1070 float v = itsPerClassThreshs[i] * 100.0F;
1071 ImGui::PushItemWidth(250);
1072 if (ImGui::SliderFloat(wname, &v, 0.01F, 100.0F)) itsPerClassThreshs[i] = v * 0.01F;
1073 ImGui::PopItemWidth();
1076 if (has_image_encoder)
1079 snprintf(wname, 32,
"Live select##%d",
int(i));
1080 if (ImGui::Button(wname) && livestate == -1) { livestate = 0; liveclsid = i; }
1086 ImGui::AlignTextToFramePadding();
1087 static char customname[256] =
"yolo-jevois-custom";
1088 ImGui::TextUnformatted(
"Custom pipeline name: ");
1090 ImGui::PushItemWidth(450);
1091 ImGui::InputText(
"##scp", customname,
sizeof(customname));
1092 ImGui::PopItemWidth();
1095 if (ImGui::Button(
"Save"))
1100 std::string pipeinst = dd.back(); dd.pop_back();
1101 std::shared_ptr<jevois::Component> comp = engine()->
getComponent(dd[0]); dd.erase(dd.begin());
1102 for (std::string
const & c : dd) { comp = comp->getSubComponent(c);
if (!comp)
LFATAL(
"Internal error"); }
1105 std::vector<std::pair<std::string , std::string >>
const & settings = pipe->
zooSettings();
1109 cv::FileStorage fs(basename +
".yml", cv::FileStorage::WRITE | cv::FileStorage::FORMAT_YAML);
1110 if (fs.isOpened() ==
false)
LFATAL(
"Failed to write " << basename <<
".yml");
1112 fs << customname <<
"{";
1114 std::string classes;
1115 for (
auto const & s : settings)
1116 if (s.first ==
"classes") classes = s.second;
1117 else if (s.first !=
"perclassthresh") fs << s.first << s.second;
1120 std::vector<std::string> pcth;
1121 for (
float t : itsPerClassThreshs) pcth.emplace_back(
jevois::sformat(
"%.2f", t * 100.0F));
1122 fs <<
"perclassthresh" << (
'"' +
jevois::join(pcth,
" ") +
'"');
1127 std::vector<std::string> cls;
1128 for (
size_t i = 0; i < nclass; ++i)
1129 if (itsYOLOjevois->image(i).empty())
1132 cls.push_back(itsLabels[i]);
1137 std::string
const imgname = basename +
"-cls" + std::to_string(i) +
".png";
1138 cv::Mat img_bgr; cv::cvtColor(itsYOLOjevois->image(i), img_bgr, cv::COLOR_RGB2BGR);
1139 cv::imwrite(imgname, img_bgr);
1140 cls.emplace_back(
"imagefile:" + imgname);
1142 fs <<
"classes" << (
'"' +
jevois::join(cls,
", ") +
'"');
1144 else if (classes.empty() ==
false)
1145 fs <<
"classes" << classes;
1148 helper->
reportInfo(
"Custom model definition saved to " + basename +
".yml");
1153 ImGui::PopStyleColor();
1157 if (livestate != -1 && helper->
selectImageBox(livestate, livetl, livebr))
1160 ImVec2 tl = helper->
d2i(livetl,
"c");
1161 ImVec2 br = helper->
d2i(livebr,
"c");
1165 cv::Mat hdimg = inframe->
getCvRGB();
1166 cv::Rect r(cv::Point(tl.x, tl.y), cv::Point(br.x, br.y));
1167 cv::Mat roi = hdimg(r).clone();
1170 itsYOLOjevois->update(liveclsid, roi);