JeVoisBase  1.3
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
DemoSalGistFaceObj.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #include <jevois/Core/Module.H>
19 
20 #include <jevois/Debug/Log.H>
21 #include <jevois/Debug/Timer.H>
27 
28 #include <opencv2/core/core.hpp>
29 #include <opencv2/imgproc/imgproc.hpp>
30 #include <linux/videodev2.h> // for v4l2 pixel types
31 //#include <opencv2/highgui/highgui.hpp> // used for debugging only, see imshow below
32 // icon by Freepik in interface at flaticon
33 
34 //! Simple demo that combines saliency, gist, face detection, and object recognition
35 /*! Run the visual saliency algorithm to find the most interesting location in the field of view. Then extract a square
36  image region around that point. On alternating frames, either
37 
38  - attempt to detect a face in the attended region, and, if positively detected, show the face in the bottom-right
39  corner of the display. The last detected face will remain shown in the bottom-right corner of the display until a
40  new face is detected.
41 
42  - or attempt to recognize an object in the attended region, using a deep neural network. The default network is a
43  handwritten digot recognition network that replicated the original LeNet by Yann LeCun and is one of the very
44  first convolutional neural networks. The network has been trained on the standard MNIST database of handwritten
45  digits, and achives over 99% correct recognition on the MNIST test dataset. When a digit is positively identified,
46  a picture of it appears near the last detected face towards the bottom-right corner of the display, and a text
47  string with the digit that has been identified appears to the left of the picture of the digit.
48 
49  Serial Messages
50  ---------------
51 
52  This module can send standardized serial messages as described in \ref UserSerialStyle, where all coordinates and
53  sizes are standardized using \ref coordhelpers. One message is issued on every video frame at the temporally
54  filtered attended (most salient) location (green circle in the video display):
55 
56  - Serial message type: \b 2D
57  - `id`: always \b sm (shorthand for saliency map)
58  - `x`, `y`: standardized 2D coordinates of temporally-filtered most salient point
59  - `w`, `h`: standardized size of the pink square box around each attended point
60  - `extra`: none (empty string)
61 
62  @author Laurent Itti
63 
64  @displayname Demo Saliency + Gist + Face Detection + Object Recognition
65  @videomapping YUYV 640 312 50.0 YUYV 320 240 50.0 JeVois DemoSalGistFaceObj
66  @email itti\@usc.edu
67  @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
68  @copyright Copyright (C) 2016 by Laurent Itti, iLab and the University of Southern California
69  @mainurl http://jevois.org
70  @supporturl http://jevois.org/doc
71  @otherurl http://iLab.usc.edu
72  @license GPL v3
73  @distribution Unrestricted
74  @restrictions None
75  \ingroup modules */
77 {
78  public:
79  //! Constructor
80  DemoSalGistFaceObj(std::string const & instance) : jevois::StdModule(instance), itsScoresStr(" ")
81  {
82  itsSaliency = addSubComponent<Saliency>("saliency");
83  itsFaceDetector = addSubComponent<FaceDetector>("facedetect");
84  itsObjectRecognition = addSubComponent<ObjectRecognitionMNIST>("MNIST");
85  itsKF = addSubComponent<Kalman2D>("kalman");
86  }
87 
88  //! Virtual destructor for safe inheritance
89  virtual ~DemoSalGistFaceObj() { }
90 
91  //! Processing function
92  virtual void process(jevois::InputFrame && inframe, jevois::OutputFrame && outframe) override
93  {
94  static jevois::Timer itsProcessingTimer("Processing");
95  static cv::Mat itsLastFace(60, 60, CV_8UC2, 0x80aa) ; // Note that this one will contain raw YUV pixels
96  static cv::Mat itsLastObject(60, 60, CV_8UC2, 0x80aa) ; // Note that this one will contain raw YUV pixels
97  static std::string itsLastObjectCateg;
98  static bool doobject = false; // alternate between object and face recognition
99 
100  // Wait for next available camera image:
101  jevois::RawImage inimg = inframe.get();
102 
103  // We only handle one specific input format in this demo:
104  inimg.require("input", 320, 240, V4L2_PIX_FMT_YUYV);
105 
106  itsProcessingTimer.start();
107  int const roihw = 32; // face & object roi half width and height
108 
109  // Compute saliency, in a thread:
110  auto sal_fut = std::async(std::launch::async, [&](){ itsSaliency->process(inimg, true); });
111 
112  // While computing, wait for an image from our gadget driver into which we will put our results:
113  jevois::RawImage outimg = outframe.get();
114  outimg.require("output", 640, 312, V4L2_PIX_FMT_YUYV);
115 
116  // Paste the original image to the top-left corner of the display:
117  unsigned short const txtcol = jevois::yuyv::White;
118  jevois::rawimage::paste(inimg, outimg, 0, 0);
119  jevois::rawimage::writeText(outimg, "JeVois Saliency + Gist + Faces + Objects", 3, 3, txtcol);
120 
121  // Wait until saliency computation is complete:
122  sal_fut.get();
123 
124  // find most salient point:
125  int mx, my; intg32 msal;
126  itsSaliency->getSaliencyMax(mx, my, msal);
127 
128  // Scale back to original image coordinates:
129  int const smlev = itsSaliency->smscale::get();
130  int const smadj = smlev > 0 ? (1 << (smlev-1)) : 0; // half a saliency map pixel adjustment
131  int const dmx = (mx << smlev) + smadj;
132  int const dmy = (my << smlev) + smadj;
133 
134  // Compute instantaneous attended ROI (note: coords must be even to avoid flipping U/V when we later paste):
135  int const rx = std::min(int(inimg.width) - roihw, std::max(roihw, dmx));
136  int const ry = std::min(int(inimg.height) - roihw, std::max(roihw, dmy));
137 
138  // Asynchronously launch a bunch of saliency drawings and filter the attended locations
139  auto draw_fut =
140  std::async(std::launch::async, [&]() {
141  // Paste the various saliency results:
142  drawMap(outimg, &itsSaliency->salmap, 320, 0, 16, 20);
143  jevois::rawimage::writeText(outimg, "Saliency Map", 640 - 12*6-4, 3, txtcol);
144 
145  drawMap(outimg, &itsSaliency->color, 0, 240, 4, 18);
146  jevois::rawimage::writeText(outimg, "Color", 3, 243, txtcol);
147 
148  drawMap(outimg, &itsSaliency->intens, 80, 240, 4, 18);
149  jevois::rawimage::writeText(outimg, "Intensity", 83, 243, txtcol);
150 
151  drawMap(outimg, &itsSaliency->ori, 160, 240, 4, 18);
152  jevois::rawimage::writeText(outimg, "Orientation", 163, 243, txtcol);
153 
154  drawMap(outimg, &itsSaliency->flicker, 240, 240, 4, 18);
155  jevois::rawimage::writeText(outimg, "Flicker", 243, 243, txtcol);
156 
157  drawMap(outimg, &itsSaliency->motion, 320, 240, 4, 18);
158  jevois::rawimage::writeText(outimg, "Motion", 323, 243, txtcol);
159 
160  // Draw the gist vector:
161  drawGist(outimg, itsSaliency->gist, itsSaliency->gist_size, 400, 242, 40, 2);
162 
163  // Draw a small square at most salient location in image and in saliency map:
164  jevois::rawimage::drawFilledRect(outimg, mx * 16 + 5, my * 16 + 5, 8, 8, 0xffff);
165  jevois::rawimage::drawFilledRect(outimg, 320 + mx * 16 + 5, my * 16 + 5, 8, 8, 0xffff);
166  jevois::rawimage::drawRect(outimg, rx - roihw, ry - roihw, roihw*2, roihw*2, 0xf0f0);
167  jevois::rawimage::drawRect(outimg, rx - roihw+1, ry - roihw+1, roihw*2-2, roihw*2-2, 0xf0f0);
168 
169  // Blank out free space from 480 to 519 at the bottom, and small space above and below gist vector:
170  jevois::rawimage::drawFilledRect(outimg, 480, 240, 40, 60, 0x8000);
171  jevois::rawimage::drawRect(outimg, 400, 240, 80, 2, 0x80a0);
172  jevois::rawimage::drawRect(outimg, 400, 298, 80, 2, 0x80a0);
173  jevois::rawimage::drawFilledRect(outimg, 0, 300, 640, 12, jevois::yuyv::Black);
174 
175  // Filter the attended locations:
176  itsKF->set(dmx, dmy, inimg.width, inimg.height);
177  float kfxraw, kfyraw, kfximg, kfyimg;
178  itsKF->get(kfxraw, kfyraw, kfximg, kfyimg, inimg.width, inimg.height, 1.0F, 1.0F);
179 
180  // Draw a circle around the kalman-filtered attended location:
181  jevois::rawimage::drawCircle(outimg, int(kfximg), int(kfyimg), 20, 1, jevois::yuyv::LightGreen);
182 
183  // Send saliency info to serial port (for arduino, etc):
184  sendSerialImg2D(inimg.width, inimg.height, kfximg, kfyimg, roihw * 2, roihw * 2, "sm");
185  });
186 
187  // Extract a raw YUYV ROI around attended point:
188  cv::Mat rawimgcv = jevois::rawimage::cvImage(inimg);
189  cv::Mat rawroi = rawimgcv(cv::Rect(rx - roihw, ry - roihw, roihw * 2, roihw * 2));
190 
191  if (doobject)
192  {
193  // #################### Object recognition:
194 
195  // Prepare a color or grayscale ROI for the object recognition module:
196  auto objsz = itsObjectRecognition->insize();
197  cv::Mat objroi;
198  switch (objsz.depth_)
199  {
200  case 1: // grayscale input
201  {
202  // mnist is white letters on black background, so invert the image before we send it for recognition, as we
203  // assume here black letters on white backgrounds. We also need to provide a clean crop around the digit for
204  // the deep network to work well:
205  cv::cvtColor(rawroi, objroi, CV_YUV2GRAY_YUYV);
206 
207  // Find the 10th percentile gray value:
208  size_t const elem = (objroi.cols * objroi.rows * 10) / 100;
209  std::vector<unsigned char> v; v.assign(objroi.datastart, objroi.dataend);
210  std::nth_element(v.begin(), v.begin() + elem, v.end());
211  unsigned char const thresh = std::min((unsigned char)(100), std::max((unsigned char)(30), v[elem]));
212 
213  // Threshold and invert the image:
214  cv::threshold(objroi, objroi, thresh, 255, cv::THRESH_BINARY_INV);
215 
216  // Find the digit and center and crop it:
217  cv::Mat pts; cv::findNonZero(objroi, pts);
218  cv::Rect r = cv::boundingRect(pts);
219  int const cx = r.x + r.width / 2;
220  int const cy = r.y + r.height / 2;
221  int const siz = std::min(roihw * 2, std::max(16, 8 + std::max(r.width, r.height))); // margin of 4 pix
222  int const tlx = std::max(0, std::min(roihw*2 - siz, cx - siz/2));
223  int const tly = std::max(0, std::min(roihw*2 - siz, cy - siz/2));
224  cv::Rect ar(tlx, tly, siz, siz);
225  cv::resize(objroi(ar), objroi, cv::Size(objsz.width_, objsz.height_), 0, 0, cv::INTER_AREA);
226  //cv::imshow("cropped roi", objroi);cv::waitKey(1);
227  }
228  break;
229 
230  case 3: // color input
231  cv::cvtColor(rawroi, objroi, CV_YUV2RGB_YUYV);
232  cv::resize(objroi, objroi, cv::Size(objsz.width_, objsz.height_), 0, 0, cv::INTER_AREA);
233  break;
234 
235  default:
236  LFATAL("Unsupported object detection input depth " << objsz.depth_);
237  }
238 
239  // Launch object recognition on the ROI and get the recognition scores:
240  auto scores = itsObjectRecognition->process(objroi);
241 
242  // Create a string to show all scores:
243  std::ostringstream oss;
244  for (size_t i = 0; i < scores.size(); ++i)
245  oss << itsObjectRecognition->category(i) << ':' << std::fixed << std::setprecision(2) << scores[i] << ' ';
246  itsScoresStr = oss.str();
247 
248  // Check whether the highest score is very high and significantly higher than the second best:
249  float best1 = scores[0], best2 = scores[0]; size_t idx1 = 0, idx2 = 0;
250  for (size_t i = 1; i < scores.size(); ++i)
251  {
252  if (scores[i] > best1) { best2 = best1; idx2 = idx1; best1 = scores[i]; idx1 = i; }
253  else if (scores[i] > best2) { best2 = scores[i]; idx2 = i; }
254  }
255 
256  // Update our display upon each "clean" recognition:
257  if (best1 > 90.0F && best2 < 20.0F)
258  {
259  // Remember this recognized object for future displays:
260  itsLastObjectCateg = itsObjectRecognition->category(idx1);
261  itsLastObject = rawimgcv(cv::Rect(rx - 30, ry - 30, 60, 60)).clone(); // make a deep copy
262 
263  LINFO("Object recognition: best: " << itsLastObjectCateg <<" (" << best1 <<
264  "), second best: " << itsObjectRecognition->category(idx2) << " (" << best2 << ')');
265  }
266  }
267  else
268  {
269  // #################### Face detection:
270 
271  // Prepare a grey ROI from our raw YUYV roi:
272  cv::Mat grayroi; cv::cvtColor(rawroi, grayroi, CV_YUV2GRAY_YUYV);
273  cv::equalizeHist(grayroi, grayroi);
274 
275  // Launch the face detector:
276  std::vector<cv::Rect> faces; std::vector<std::vector<cv::Rect> > eyes;
277  itsFaceDetector->process(grayroi, faces, eyes, false);
278 
279  // Draw the faces and eyes, if any:
280  if (faces.size())
281  {
282  LINFO("detected " << faces.size() << " faces");
283  // Store the attended ROI into our last ROI, fixed size 60x60 for our display:
284  itsLastFace = rawimgcv(cv::Rect(rx - 30, ry - 30, 60, 60)).clone(); // make a deep copy
285  }
286 
287  for (size_t i = 0; i < faces.size(); ++i)
288  {
289  // Draw one face:
290  cv::Rect const & f = faces[i];
291  jevois::rawimage::drawRect(outimg, f.x + rx - roihw, f.y + ry - roihw, f.width, f.height, 0xc0ff);
292 
293  // Draw the corresponding eyes:
294  for (auto const & e : eyes[i])
295  jevois::rawimage::drawRect(outimg, e.x + rx - roihw, e.y + ry - roihw, e.width, e.height, 0x40ff);
296  }
297  }
298 
299  // Let camera know we are done processing the raw YUV input image. NOTE: rawroi is now invalid:
300  inframe.done();
301 
302  // Paste our last attended and recognized face and object (or empty pics):
303  cv::Mat outimgcv(outimg.height, outimg.width, CV_8UC2, outimg.buf->data());
304  itsLastObject.copyTo(outimgcv(cv::Rect(520, 240, 60, 60)));
305  itsLastFace.copyTo(outimgcv(cv::Rect(580, 240, 60, 60)));
306 
307  // Wait until all saliency drawings are complete (since they blank out our object label area):
308  draw_fut.get();
309 
310  // Print all object scores:
311  jevois::rawimage::writeText(outimg, itsScoresStr, 2, 301, txtcol);
312 
313  // Write any positively recognized object category:
314  jevois::rawimage::writeText(outimg, itsLastObjectCateg.c_str(), 517-6*itsLastObjectCateg.length(), 263, txtcol);
315 
316  // FIXME do svm on gist and write resuts here
317 
318  // Show processing fps:
319  std::string const & fpscpu = itsProcessingTimer.stop();
320  jevois::rawimage::writeText(outimg, fpscpu, 3, 240 - 13, jevois::yuyv::White);
321 
322  // Send the output image with our processing results to the host over USB:
323  outframe.send();
324 
325  // Alternate between face and object recognition:
326  doobject = ! doobject;
327  }
328 
329  protected:
330  std::shared_ptr<Saliency> itsSaliency;
331  std::shared_ptr<FaceDetector> itsFaceDetector;
332  std::shared_ptr<ObjectRecognitionBase> itsObjectRecognition;
333  std::shared_ptr<Kalman2D> itsKF;
334  std::string itsScoresStr;
335 };
336 
337 // Allow the module to be loaded as a shared object (.so) file:
cv::Mat cvImage(RawImage const &src)
virtual ~DemoSalGistFaceObj()
Virtual destructor for safe inheritance.
virtual void process(jevois::InputFrame &&inframe, jevois::OutputFrame &&outframe) override
Processing function.
std::shared_ptr< Saliency > itsSaliency
void drawCircle(RawImage &img, int x, int y, unsigned int rad, unsigned int thick, unsigned int col)
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
void require(char const *info, unsigned int w, unsigned int h, unsigned int f) const
unsigned int height
std::shared_ptr< FaceDetector > itsFaceDetector
void drawGist(jevois::RawImage &img, unsigned char const *gist, size_t gistsize, unsigned int xoff, unsigned int yoff, unsigned int width, unsigned int scale)
Definition: Saliency.C:771
DemoSalGistFaceObj(std::string const &instance)
Constructor.
std::shared_ptr< ObjectRecognitionBase > itsObjectRecognition
StdModule(std::string const &instance)
void drawMap(jevois::RawImage &img, env_image const *fmap, unsigned int xoff, unsigned int yoff, unsigned int scale)
Definition: Saliency.C:709
void sendSerialImg2D(unsigned int camw, unsigned int camh, float x, float y, float w=0.0F, float h=0.0F, std::string const &id="", std::string const &extra="")
#define LFATAL(msg)
ENV_INTG32_TYPE intg32
32-bit signed integer
Definition: env_types.h:52
void drawFilledRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int col)
std::shared_ptr< Kalman2D > itsKF
JEVOIS_REGISTER_MODULE(DemoSalGistFaceObj)
Simple demo that combines saliency, gist, face detection, and object recognition. ...
#define LINFO(msg)
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
unsigned int width
std::string const & stop()
void paste(RawImage const &src, RawImage &dest, int dx, int dy)