JeVois  1.22
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
PreProcessorBlob.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
19#include <jevois/DNN/Utils.H>
21
22#include <opencv2/dnn.hpp>
23#include <opencv2/imgproc/imgproc.hpp>
24
25#define DETAILS(fmt, ...) \
26 do { if (detail) itsInfo.emplace_back(prefix + jevois::sformat(fmt, ## __VA_ARGS__)); } while(0)
27
28#define DETAILS2(fmt, ...) \
29 do { itsInfo.emplace_back(prefix + jevois::sformat(fmt, ## __VA_ARGS__)); } while(0)
30
31// ####################################################################################################
34
35// ####################################################################################################
37{
38 numin::freeze(doit);
39}
40
41// ####################################################################################################
42std::vector<cv::Mat> jevois::dnn::PreProcessorBlob::process(cv::Mat const & img, bool swaprb,
43 std::vector<vsi_nn_tensor_attr_t> const & attrs,
44 std::vector<cv::Rect> & crops)
45{
46 bool const detail = details::get();
47 itsInfo.clear();
48 cv::Scalar m = mean::get();
49 cv::Scalar sd = stdev::get();
50 if (sd[0] == 0.0 || sd[1] == 0.0 || sd[2] == 0.0) LFATAL("stdev cannot be zero");
51 float sc = scale::get();
52 if (sc == 0.0F) LFATAL("Scale cannot be zero");
53
54 std::vector<cv::Mat> blobs; size_t bnum = 0;
55 for (vsi_nn_tensor_attr_t const & attr : attrs)
56 {
57 // --------------------------------------------------------------------------------
58 // Get the blob:
59 cv::Mat blob;
60 cv::Size bsiz = jevois::dnn::attrsize(attr);
61 cv::Rect crop;
62 std::string prefix; if (detail) prefix = "Blob " + std::to_string(bnum) + ": ";
63
64 // Start with an unscaled crop:
65 unsigned int bw = bsiz.width, bh = bsiz.height;
66 if (bw == 1 || bw == 3 || bh == 1 || bh == 3)
67 LFATAL("Incorrect input tensor " << jevois::dnn::shapestr(attr) <<
68 "; did you swap NHWC vs NCHW in your intensors specification?");
69
70 // --------------------------------------------------------------------------------
71 // Compute crop rectangle:
72 if (letterbox::get())
73 {
74 jevois::applyLetterBox(bw, bh, img.cols, img.rows, false);
75
76 cv::Rect roi;
77 roi.x = (img.cols - bw) / 2;
78 roi.y = (img.rows - bh) / 2;
79 roi.width = bw;
80 roi.height = bh;
81 blob = img(roi);
82
83 crop.x = (img.cols - bw) / 2;
84 crop.y = (img.rows - bh) / 2;
85 crop.width = bw;
86 crop.height = bh;
87 DETAILS("Letterbox %dx%d @ %d,%d", bw, bh, crop.x, crop.y);
88 }
89 else
90 {
91 blob = img;
92
93 crop.x = 0;
94 crop.y = 0;
95 crop.width = img.cols;
96 crop.height = img.rows;
97 }
98
99 // --------------------------------------------------------------------------------
100 // Crop and resize to desired network input dims:
101 cv::InterpolationFlags interpflags;
102 switch (interp::get())
103 {
104 case jevois::dnn::preprocessor::InterpMode::Linear: interpflags = cv::INTER_LINEAR; break;
105 case jevois::dnn::preprocessor::InterpMode::Cubic: interpflags = cv::INTER_CUBIC; break;
106 case jevois::dnn::preprocessor::InterpMode::Area: interpflags = cv::INTER_AREA; break;
107 case jevois::dnn::preprocessor::InterpMode::Lanczos4: interpflags = cv::INTER_LANCZOS4; break;
108 default: interpflags = cv::INTER_NEAREST;
109 }
110
111 cv::resize(blob, blob, bsiz, 0.0, 0.0, interpflags);
112 DETAILS("Resize to %dx%d%s", blob.cols, blob.rows, letterbox::get() ? "" : " (stretch)");
113
114 // --------------------------------------------------------------------------------
115 // Swap red/blue byte order if we have color and will not do planar; would be better below except that cvtColor
116 // always outputs 8U pixels so we have to do this here before possible conversion to 8S or others:
117 bool swapped = false;
118 if (swaprb && attr.dtype.fmt == VSI_NN_DIM_FMT_NHWC)
119 {
120 switch (blob.channels())
121 {
122 case 3: cv::cvtColor(blob, blob, cv::COLOR_RGB2BGR); swapped = true; break;
123 case 4: cv::cvtColor(blob, blob, cv::COLOR_RGBA2BGRA); swapped = true; break;
124 default: break; // Ignore swaprb value if not 3 or 4 channels
125 }
126 DETAILS("Swap Red <-> Blue");
127 }
128
129 // If we need to swap but will do it later, swap mean and std red/blue now:
130 if (swaprb && swapped == false) { std::swap(m[0], m[2]); std::swap(sd[0], sd[2]); }
131
132 // --------------------------------------------------------------------------------
133 // Convert and quantize if needed: First try some fast paths:
134 unsigned int const tt = jevois::dnn::vsi2cv(attr.dtype.vx_type);
135 unsigned int const bt = blob.depth();
136 bool const uniformsd = (sd[0] == sd[1] && sd[1] == sd[2]);
137 bool const uniformmean = (m[0] == m[1] && m[1] == m[2]);
138 bool const unitsd = (uniformsd && sd[0] > 0.99 && sd[0] < 1.01);
139 bool notdone = true;
140
141 if (bt == CV_8U && tt == CV_8U && attr.dtype.qnt_type == VSI_NN_QNT_TYPE_NONE)
142 {
143 DETAILS("8U to 8U direct no quantization");
144 DETAILS("(ignoring mean, scale, stdev)");
145 notdone = false;
146 }
147
148 else if (unitsd && attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP)
149 {
150 if (bt == CV_8U && tt == CV_8S)
151 {
152 // --------------------
153 // Convert from 8U to 8S with DFP quantization:
154 cv::Mat newblob(bsiz, CV_MAKETYPE(tt, blob.channels()));
155
156 uint8_t const * bdata = (uint8_t const *)blob.data;
157 uint32_t const sz = blob.total() * blob.channels();
158 int8_t * data = (int8_t *)newblob.data;
159 if (attr.dtype.fl > 7) LFATAL("Invalid DFP fl value " << attr.dtype.fl << ": must be in [0..7]");
160 int const shift = 8 - attr.dtype.fl;
161 for (uint32_t i = 0; i < sz; ++i) *data++ = *bdata++ >> shift;
162
163 DETAILS("8U to 8S DFP:%d: bit-shift >> %d", attr.dtype.fl, shift);
164 blob = newblob;
165
166 if (m[0] > 1.0 || m[1] > 1.0 || m[2] > 1.0)
167 {
168 blob -= m;
169 DETAILS("Subtract mean [%.2f %.2f %.2f]", m[0], m[1], m[2]);
170 }
171 notdone = false;
172 }
173 else if (bt == CV_8U && tt == CV_16S)
174 {
175 // --------------------
176 // Convert from 8U to 16S with DFP quantization:
177 int const fl = attr.dtype.fl;
178 uint8_t const * bdata = (uint8_t const *)blob.data;
179 uint32_t const sz = blob.total() * blob.channels();
180 if (fl > 15) LFATAL("Invalid DFP fl value " << fl << ": must be in [0..15]");
181 if (fl > 8)
182 {
183 cv::Mat newblob(bsiz, CV_MAKETYPE(tt, blob.channels()));
184 int16_t * data = (int16_t *)newblob.data;
185 int const shift = fl - 8;
186 for (uint32_t i = 0; i < sz; ++i) *data++ = int16_t(*bdata++) << shift;
187 blob = newblob;
188 DETAILS("8U to 16S DFP:%d: bit-shift << %d", fl, shift);
189 }
190 else if (fl < 8)
191 {
192 cv::Mat newblob(bsiz, CV_MAKETYPE(tt, blob.channels()));
193 int16_t * data = (int16_t *)newblob.data;
194 int const shift = 8 - fl;
195 for (uint32_t i = 0; i < sz; ++i) *data++ = int16_t(*bdata++) >> shift;
196 blob = newblob;
197 DETAILS("8U to 16S DFP:%d: bit-shift >> %d", fl, shift);
198 }
199 else
200 {
201 blob.convertTo(blob, tt);
202 DETAILS("8U to 16S DFP:%d: direct conversion", fl);
203 }
204
205 if (m[0] > 1.0 || m[1] > 1.0 || m[2] > 1.0)
206 {
207 blob -= m;
208 DETAILS("Subtract mean [%.2f %.2f %.2f]", m[0], m[1], m[2]);
209 }
210 notdone = false;
211 }
212 // We only handle DFP: 8U->8S and 8U->16S with unit stdev here, more general code below for other cases.
213 }
214
215 if (notdone && uniformsd && uniformmean)
216 {
217 double qs, zp;
218 switch (attr.dtype.qnt_type)
219 {
220 case VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC: qs = attr.dtype.scale; zp = attr.dtype.zero_point; notdone = false; break;
221 case VSI_NN_QNT_TYPE_DFP: qs = 1.0 / (1 << attr.dtype.fl); zp = 0.0; notdone = false; break;
222 default: break;
223 }
224
225 if (notdone == false)
226 {
227 if (qs == 0.0) LFATAL("Quantizer scale must not be zero");
228 double alpha = sc / (sd[0] * qs);
229 double beta = zp - m[0] * alpha;
230 if (alpha > 0.99 && alpha < 1.01) alpha = 1.0; // will run faster
231 if (beta > -0.51 && beta < 0.51) beta = 0.0; // will run faster
232
233 if (alpha == 1.0 && beta == 0.0 && bt == tt)
234 DETAILS("No conversion needed");
235 else
236 {
237 cv::Mat newblob;
238 blob.convertTo(newblob, tt, alpha, beta);
239 blob = newblob;
240 if (detail)
241 {
242 DETAILS2("%s to %s fast path", jevois::cvtypestr(bt).c_str(), jevois::cvtypestr(tt).c_str());
243 if (m[0]) DETAILS2("Subtract mean [%.2f %.2f %.2f]", m[0], m[1], m[2]);
244 if (sd[0] != 1.0) DETAILS2("Divide by stdev [%f %f %f]", sd[0], sd[1], sd[2]);
245 if (sc != 1.0F) DETAILS2("Multiply by scale %f (=1/%.2f)", sc, 1.0/sc);
246 if (qs != 1.0F) DETAILS2("Divide by quantizer scale %f (=1/%.2f)", qs, 1.0/qs);
247 if (zp) DETAILS2("Add quantizer zero-point %.2f", zp);
248 if (alpha == 1.0 && beta == 0.0) DETAILS2("Summary: out = in");
249 else if (alpha == 1.0) DETAILS2("Summary: out = in%+f", beta);
250 else if (beta == 0.0) DETAILS2("Summary: out = in*%f", alpha);
251 else DETAILS2("Summary: out = in*%f%+f", alpha, beta);
252 }
253 }
254 }
255 }
256
257 if (notdone)
258 {
259 // This is the slowest path... you should add optimizations above for some specific cases:
260 blob.convertTo(blob, CV_32F);
261 DETAILS("Convert to 32F");
262
263 // Apply mean and scale:
264 if (m != cv::Scalar())
265 {
266 blob -= m;
267 DETAILS("Subtract mean [%.2f %.2f %.2f]", m[0], m[1], m[2]);
268 }
269
270 if (sd != cv::Scalar(1.0F, 1.0F, 1.0F))
271 {
272 if (sd[0] == 0.0F || sd[1] == 0.0F || sd[2] == 0.0F) LFATAL("Parameter stdev cannot contain any zero");
273 if (sc != 1.0F && sc != 0.0F)
274 {
275 sd *= 1.0F / sc;
276 DETAILS("Divide stdev by scale %f (=1/%.2f)", sc, 1.0/sc);
277 }
278 blob /= sd;
279 DETAILS("Divide by stdev [%f %f %f]", sd[0], sd[1], sd[2]);
280 }
281 else if (sc != 1.0F)
282 {
283 blob *= sc;
284 DETAILS("Multiply by scale %f (=1/%.2f)", sc, 1.0/sc);
285 }
286
287 if (tt == CV_16F || tt == CV_64F)
288 {
289 blob.convertTo(blob, tt);
290 DETAILS("Convert to %s", jevois::dnn::attrstr(attr).c_str());
291 }
292 else if (tt != CV_32F)
293 {
294 blob = jevois::dnn::quantize(blob, attr);
295 DETAILS("Quantize to %s", jevois::dnn::attrstr(attr).c_str());
296 }
297 }
298
299 // --------------------------------------------------------------------------------
300 // Ok, blob has desired width, height, and type, but is still packed RGB. Now deal with making a 4D shape, and R/G
301 // swapping if we have channels:
302 int const nch = blob.channels();
303 switch (nch)
304 {
305 case 1:
306 break; // Nothing to do
307
308 case 3:
309 case 4:
310 {
311 // If fmt type is auto (e.g., ONNX runtime), guess it as NCHW or NHWC based on dims:
312 vsi_nn_dim_fmt_e fmt = attr.dtype.fmt;
313 if (fmt == VSI_NN_DIM_FMT_AUTO)
314 {
315 if (attr.size[0] > attr.size[2]) fmt = VSI_NN_DIM_FMT_NCHW;
316 else fmt = VSI_NN_DIM_FMT_NHWC;
317 }
318
319 switch (fmt)
320 {
321 case VSI_NN_DIM_FMT_NCHW:
322 {
323 // Convert from packed to planar:
324 int dims[] = { 1, nch, blob.rows, blob.cols };
325 cv::Mat newblob(4, dims, tt);
326
327 // Create some pointers in newblob for each channel:
328 cv::Mat nbc[nch];
329 for (int i = 0; i < nch; ++i) nbc[i] = cv::Mat(blob.rows, blob.cols, tt, newblob.ptr(0, i));
330 if (swaprb)
331 {
332 std::swap(nbc[0], nbc[2]);
333 DETAILS("Swap Red <-> Blue");
334 }
335
336 // Split:
337 cv::split(blob, nbc);
338 DETAILS("Split channels (NHWC->NCHW)");
339
340 // This our final 4D blob:
341 blob = newblob;
342 }
343 break;
344
345 case VSI_NN_DIM_FMT_NHWC:
346 {
347 // red/blue byte swap was handled above... Just convert to a 4D blob:
348 blob = blob.reshape(1, { 1, bsiz.height, bsiz.width, 3 });
349 }
350 break;
351
352 default: LFATAL("Can only handle NCHW or NHWC intensors shapes");
353 }
354 }
355 break;
356
357 default: LFATAL("Can only handle input images with 1, 3, or 4 channels");
358 }
359
360 // --------------------------------------------------------------------------------
361 // Done with this blob:
362 DETAILS("%s", jevois::dnn::attrstr(attr).c_str());
363 blobs.emplace_back(blob);
364 crops.emplace_back(crop);
365 ++bnum;
366
367
368 // --------------------------------------------------------------------------------
369 // NOTE: in principle, our code here is ready to generate several blobs.
370 // However, in practice all nets tested so far expect just one input, since they are machine vision models, except
371 // for URetinex-Net, which expects an image and a single float. Thus, here, we only generate the first blob
372 // (when numin param is at its default value of 1, otherwise up to numin blobs).
373 if (bnum >= numin::get()) break;
374 }
375 return blobs;
376}
377
378// ####################################################################################################
380 bool /*overlay*/, bool idle)
381{
382#ifdef JEVOIS_PRO
383 if (helper && idle == false)
384 for (std::string const & s : itsInfo) ImGui::BulletText("%s", s.c_str());
385#else
386 (void)helper; (void)idle;
387#endif
388}
389
#define DETAILS(fmt,...)
#define DETAILS2(fmt,...)
Helper class to assist modules in creating graphical and GUI elements.
Definition GUIhelper.H:133
A raw image as coming from a V4L2 Camera and/or being sent out to a USB Gadget.
Definition RawImage.H:111
Base class for a module that supports standardized serial messages.
Definition Module.H:234
virtual ~PreProcessorBlob()
Destructor.
void report(jevois::StdModule *mod, jevois::RawImage *outimg=nullptr, jevois::OptGUIhelper *helper=nullptr, bool overlay=true, bool idle=false) override
Report what happened in last process() to console/output video/GUI.
std::vector< cv::Mat > process(cv::Mat const &img, bool swaprb, std::vector< vsi_nn_tensor_attr_t > const &attrs, std::vector< cv::Rect > &crops) override
Extract blobs from input image.
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition Log.H:230
int vsi2cv(vsi_nn_type_e t)
Convert from NPU data type to OpenCV.
Definition Utils.C:332
cv::Mat quantize(cv::Mat const &m, vsi_nn_tensor_attr_t const &attr)
Quantize from float32 to fixed-point according to the quantization spec in attr.
Definition Utils.C:799
std::string attrstr(vsi_nn_tensor_attr_t const &attr)
Get a string describing the specs of a tensor, including quantification specs (not provided by shapes...
Definition Utils.C:536
cv::Size attrsize(vsi_nn_tensor_attr_t const &attr)
Get a tensor's (width, height) size in cv::Size format, skipping over other dimensions.
Definition Utils.C:511
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition Utils.C:109
std::string cvtypestr(unsigned int cvtype)
Convert cv::Mat::type() code to to a string (e.g., CV_8UC1, CV_32SC3, etc)
Definition Utils.C:58
void applyLetterBox(unsigned int &imw, unsigned int &imh, unsigned int const winw, unsigned int const winh, bool noalias)
Apply a letterbox resizing to fit an image into a window.
Definition Utils.C:222