JeVoisBase  1.0
JeVois Smart Embedded Machine Vision Toolkit Base Modules
DemoSalGistFaceObj.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #include <jevois/Core/Module.H>
19 
20 #include <jevois/Debug/Log.H>
21 #include <jevois/Debug/Timer.H>
27 
28 #include <opencv2/core/core.hpp>
29 #include <opencv2/imgproc/imgproc.hpp>
30 #include <linux/videodev2.h> // for v4l2 pixel types
31 //#include <opencv2/highgui/highgui.hpp> // used for debugging only, see imshow below
32 // icon by Freepik in interface at flaticon
33 
34 //! Simple demo that combines saliency, gist, face detection, and object recognition
35 /*! Run the visual saliency algorithm to find the most interesting location in the field of view. Then extract a square
36  image region around that point. On alternating frames, either
37 
38  - attempt to detect a face in the attended region, and, if positively detected, show the face in the bottom-right
39  corner of the display. The last detected face will remain shown in the bottom-right corner of the display until a
40  new face is detected.
41 
42  - or attempt to recognize an object in the attended region, using a deep neural network. The default network is a
43  handwritten digot recognition network that replicated the original LeNet by Yann LeCun and is one of the very
44  first convolutional neural networks. The network has been trained on the standard MNIST database of handwritten
45  digits, and achives over 99% correct recognition on the MNIST test dataset. When a digit is positively identified,
46  a picture of it appears near the last detected face towards the bottom-right corner of the display, and a text
47  string with the digit that has been identified appears to the left of the picture of the digit.
48 
49 
50  @author Laurent Itti
51 
52  @displayname Demo Saliency + Gist + Face Detection + Object Recognition
53  @videomapping YUYV 640 312 50.0 YUYV 320 240 50.0 JeVois DemoSalGistFaceObj
54  @email itti\@usc.edu
55  @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
56  @copyright Copyright (C) 2016 by Laurent Itti, iLab and the University of Southern California
57  @mainurl http://jevois.org
58  @supporturl http://jevois.org/doc
59  @otherurl http://iLab.usc.edu
60  @license GPL v3
61  @distribution Unrestricted
62  @restrictions None
63  \ingroup modules */
65 {
66  public:
67  //! Constructor
68  DemoSalGistFaceObj(std::string const & instance) : jevois::Module(instance), itsScoresStr(" ")
69  {
70  itsSaliency = addSubComponent<Saliency>("saliency");
71  itsFaceDetector = addSubComponent<FaceDetector>("facedetect");
72  itsObjectRecognition = addSubComponent<ObjectRecognitionMNIST>("MNIST");
73  itsKF = addSubComponent<Kalman2D>("kalman");
74  }
75 
76  //! Virtual destructor for safe inheritance
77  virtual ~DemoSalGistFaceObj() { }
78 
79  //! Processing function
80  virtual void process(jevois::InputFrame && inframe, jevois::OutputFrame && outframe) override
81  {
82  static jevois::Timer itsProcessingTimer("Processing");
83  static cv::Mat itsLastFace(60, 60, CV_8UC2, 0x80aa) ; // Note that this one will contain raw YUV pixels
84  static cv::Mat itsLastObject(60, 60, CV_8UC2, 0x80aa) ; // Note that this one will contain raw YUV pixels
85  static std::string itsLastObjectCateg;
86  static bool doobject = false; // alternate between object and face recognition
87 
88  // Wait for next available camera image:
89  jevois::RawImage inimg = inframe.get();
90 
91  // We only handle one specific input format in this demo:
92  inimg.require("input", 320, 240, V4L2_PIX_FMT_YUYV);
93 
94  itsProcessingTimer.start();
95  int const roihw = 32; // face & object roi half width and height
96 
97  // Compute saliency, in a thread:
98  auto sal_fut = std::async(std::launch::async, [&](){ itsSaliency->process(inimg, true); });
99 
100  // While computing, wait for an image from our gadget driver into which we will put our results:
101  jevois::RawImage outimg = outframe.get();
102  outimg.require("output", 640, 312, V4L2_PIX_FMT_YUYV);
103 
104  // Paste the original image to the top-left corner of the display:
105  unsigned short const txtcol = jevois::yuyv::White;
106  jevois::rawimage::paste(inimg, outimg, 0, 0);
107  jevois::rawimage::writeText(outimg, "JeVois Saliency + Gist + Faces + Objects", 3, 3, txtcol);
108 
109  // Wait until saliency computation is complete:
110  sal_fut.get();
111 
112  // find most salient point:
113  int mx, my; intg32 msal;
114  itsSaliency->getSaliencyMax(mx, my, msal);
115 
116  // Scale back to original image coordinates:
117  int const smlev = itsSaliency->smscale::get();
118  int const smadj = smlev > 0 ? (1 << (smlev-1)) : 0; // half a saliency map pixel adjustment
119  int const dmx = (mx << smlev) + smadj;
120  int const dmy = (my << smlev) + smadj;
121 
122  // Compute instantaneous attended ROI (note: coords must be even to avoid flipping U/V when we later paste):
123  int const rx = std::min(int(inimg.width) - roihw, std::max(roihw, dmx));
124  int const ry = std::min(int(inimg.height) - roihw, std::max(roihw, dmy));
125 
126  // Asynchronously launch a bunch of saliency drawings and filter the attended locations
127  auto draw_fut =
128  std::async(std::launch::async, [&]() {
129  // Paste the various saliency results:
130  drawMap(outimg, &itsSaliency->salmap, 320, 0, 16, 20);
131  jevois::rawimage::writeText(outimg, "Saliency Map", 640 - 12*6-4, 3, txtcol);
132 
133  drawMap(outimg, &itsSaliency->color, 0, 240, 4, 18);
134  jevois::rawimage::writeText(outimg, "Color", 3, 243, txtcol);
135 
136  drawMap(outimg, &itsSaliency->intens, 80, 240, 4, 18);
137  jevois::rawimage::writeText(outimg, "Intensity", 83, 243, txtcol);
138 
139  drawMap(outimg, &itsSaliency->ori, 160, 240, 4, 18);
140  jevois::rawimage::writeText(outimg, "Orientation", 163, 243, txtcol);
141 
142  drawMap(outimg, &itsSaliency->flicker, 240, 240, 4, 18);
143  jevois::rawimage::writeText(outimg, "Flicker", 243, 243, txtcol);
144 
145  drawMap(outimg, &itsSaliency->motion, 320, 240, 4, 18);
146  jevois::rawimage::writeText(outimg, "Motion", 323, 243, txtcol);
147 
148  // Draw the gist vector:
149  drawGist(outimg, itsSaliency->gist, itsSaliency->gist_size, 400, 242, 40, 2);
150 
151  // Draw a small square at most salient location in image and in saliency map:
152  jevois::rawimage::drawFilledRect(outimg, mx * 16 + 5, my * 16 + 5, 8, 8, 0xffff);
153  jevois::rawimage::drawFilledRect(outimg, 320 + mx * 16 + 5, my * 16 + 5, 8, 8, 0xffff);
154  jevois::rawimage::drawRect(outimg, rx - roihw, ry - roihw, roihw*2, roihw*2, 0xf0f0);
155  jevois::rawimage::drawRect(outimg, rx - roihw+1, ry - roihw+1, roihw*2-2, roihw*2-2, 0xf0f0);
156 
157  // Blank out free space from 480 to 519 at the bottom, and small space above and below gist vector:
158  jevois::rawimage::drawFilledRect(outimg, 480, 240, 40, 60, 0x8000);
159  jevois::rawimage::drawRect(outimg, 400, 240, 80, 2, 0x80a0);
160  jevois::rawimage::drawRect(outimg, 400, 298, 80, 2, 0x80a0);
161  jevois::rawimage::drawFilledRect(outimg, 0, 300, 640, 12, jevois::yuyv::Black);
162 
163  // Filter the attended locations:
164  itsKF->set(dmx, dmy, inimg.width, inimg.height);
165  float kfxraw, kfyraw, kfximg, kfyimg;
166  itsKF->get(kfxraw, kfyraw, kfximg, kfyimg, inimg.width, inimg.height, 1.0F, 1.0F);
167 
168  // Draw a circle around the kalman-filtered attended location:
169  jevois::rawimage::drawCircle(outimg, int(kfximg), int(kfyimg), 20, 1, jevois::yuyv::LightGreen);
170 
171  // Send saliency info to serial port (for arduino, etc):
172  sendSerial(jevois::sformat("T2D %d %d", int(kfxraw), int(kfyraw)));
173  });
174 
175  // Extract a raw YUYV ROI around attended point:
176  cv::Mat rawimgcv = jevois::rawimage::cvImage(inimg);
177  cv::Mat rawroi = rawimgcv(cv::Rect(rx - roihw, ry - roihw, roihw * 2, roihw * 2));
178 
179  if (doobject)
180  {
181  // #################### Object recognition:
182 
183  // Prepare a color or grayscale ROI for the object recognition module:
184  auto objsz = itsObjectRecognition->insize();
185  cv::Mat objroi;
186  switch (objsz.depth_)
187  {
188  case 1: // grayscale input
189  {
190  // mnist is white letters on black background, so invert the image before we send it for recognition, as we
191  // assume here black letters on white backgrounds. We also need to provide a clean crop around the digit for
192  // the deep network to work well:
193  cv::cvtColor(rawroi, objroi, CV_YUV2GRAY_YUYV);
194 
195  // Find the 10th percentile gray value:
196  size_t const elem = (objroi.cols * objroi.rows * 10) / 100;
197  std::vector<unsigned char> v; v.assign(objroi.datastart, objroi.dataend);
198  std::nth_element(v.begin(), v.begin() + elem, v.end());
199  unsigned char const thresh = std::min((unsigned char)(100), std::max((unsigned char)(30), v[elem]));
200 
201  // Threshold and invert the image:
202  cv::threshold(objroi, objroi, thresh, 255, cv::THRESH_BINARY_INV);
203 
204  // Find the digit and center and crop it:
205  cv::Mat pts; cv::findNonZero(objroi, pts);
206  cv::Rect r = cv::boundingRect(pts);
207  int const cx = r.x + r.width / 2;
208  int const cy = r.y + r.height / 2;
209  int const siz = std::min(roihw * 2, std::max(16, 8 + std::max(r.width, r.height))); // margin of 4 pix
210  int const tlx = std::max(0, std::min(roihw*2 - siz, cx - siz/2));
211  int const tly = std::max(0, std::min(roihw*2 - siz, cy - siz/2));
212  cv::Rect ar(tlx, tly, siz, siz);
213  cv::resize(objroi(ar), objroi, cv::Size(objsz.width_, objsz.height_), 0, 0, cv::INTER_AREA);
214  //cv::imshow("cropped roi", objroi);cv::waitKey(1);
215  }
216  break;
217 
218  case 3: // color input
219  cv::cvtColor(rawroi, objroi, CV_YUV2RGB_YUYV);
220  cv::resize(objroi, objroi, cv::Size(objsz.width_, objsz.height_), 0, 0, cv::INTER_AREA);
221  break;
222 
223  default:
224  LFATAL("Unsupported object detection input depth " << objsz.depth_);
225  }
226 
227  // Launch object recognition on the ROI and get the recognition scores:
228  auto scores = itsObjectRecognition->process(objroi);
229 
230  // Create a string to show all scores:
231  std::ostringstream oss;
232  for (size_t i = 0; i < scores.size(); ++i)
233  oss << itsObjectRecognition->category(i) << ':' << std::fixed << std::setprecision(2) << scores[i] << ' ';
234  itsScoresStr = oss.str();
235 
236  // Check whether the highest score is very high and significantly higher than the second best:
237  float best1 = scores[0], best2 = scores[0]; size_t idx1 = 0, idx2 = 0;
238  for (size_t i = 1; i < scores.size(); ++i)
239  {
240  if (scores[i] > best1) { best2 = best1; idx2 = idx1; best1 = scores[i]; idx1 = i; }
241  else if (scores[i] > best2) { best2 = scores[i]; idx2 = i; }
242  }
243 
244  // Update our display upon each "clean" recognition:
245  if (best1 > 90.0F && best2 < 20.0F)
246  {
247  // Remember this recognized object for future displays:
248  itsLastObjectCateg = itsObjectRecognition->category(idx1);
249  itsLastObject = rawimgcv(cv::Rect(rx - 30, ry - 30, 60, 60)).clone(); // make a deep copy
250 
251  LINFO("Object recognition: best: " << itsLastObjectCateg <<" (" << best1 <<
252  "), second best: " << itsObjectRecognition->category(idx2) << " (" << best2 << ')');
253  }
254  }
255  else
256  {
257  // #################### Face detection:
258 
259  // Prepare a grey ROI from our raw YUYV roi:
260  cv::Mat grayroi; cv::cvtColor(rawroi, grayroi, CV_YUV2GRAY_YUYV);
261  cv::equalizeHist(grayroi, grayroi);
262 
263  // Launch the face detector:
264  std::vector<cv::Rect> faces; std::vector<std::vector<cv::Rect> > eyes;
265  itsFaceDetector->process(grayroi, faces, eyes, false);
266 
267  // Draw the faces and eyes, if any:
268  if (faces.size())
269  {
270  LINFO("detected " << faces.size() << " faces");
271  // Store the attended ROI into our last ROI, fixed size 60x60 for our display:
272  itsLastFace = rawimgcv(cv::Rect(rx - 30, ry - 30, 60, 60)).clone(); // make a deep copy
273  }
274 
275  for (size_t i = 0; i < faces.size(); ++i)
276  {
277  // Draw one face:
278  cv::Rect const & f = faces[i];
279  jevois::rawimage::drawRect(outimg, f.x + rx - roihw, f.y + ry - roihw, f.width, f.height, 0xc0ff);
280 
281  // Draw the corresponding eyes:
282  for (auto const & e : eyes[i])
283  jevois::rawimage::drawRect(outimg, e.x + rx - roihw, e.y + ry - roihw, e.width, e.height, 0x40ff);
284  }
285  }
286 
287  // Let camera know we are done processing the raw YUV input image. NOTE: rawroi is now invalid:
288  inframe.done();
289 
290  // Paste our last attended and recognized face and object (or empty pics):
291  cv::Mat outimgcv(outimg.height, outimg.width, CV_8UC2, outimg.buf->data());
292  itsLastObject.copyTo(outimgcv(cv::Rect(520, 240, 60, 60)));
293  itsLastFace.copyTo(outimgcv(cv::Rect(580, 240, 60, 60)));
294 
295  // Wait until all saliency drawings are complete (since they blank out our object label area):
296  draw_fut.get();
297 
298  // Print all object scores:
299  jevois::rawimage::writeText(outimg, itsScoresStr, 2, 301, txtcol);
300 
301  // Write any positively recognized object category:
302  jevois::rawimage::writeText(outimg, itsLastObjectCateg.c_str(), 517-6*itsLastObjectCateg.length(), 263, txtcol);
303 
304  // FIXME do svm on gist and write resuts here
305 
306  // Show processing fps:
307  std::string const & fpscpu = itsProcessingTimer.stop();
308  jevois::rawimage::writeText(outimg, fpscpu, 3, 240 - 13, jevois::yuyv::White);
309 
310  // Send the output image with our processing results to the host over USB:
311  outframe.send();
312 
313  // Alternate between face and object recognition:
314  doobject = ! doobject;
315  }
316 
317  protected:
318  std::shared_ptr<Saliency> itsSaliency;
319  std::shared_ptr<FaceDetector> itsFaceDetector;
320  std::shared_ptr<ObjectRecognitionBase> itsObjectRecognition;
321  std::shared_ptr<Kalman2D> itsKF;
322  std::string itsScoresStr;
323 };
324 
325 // Allow the module to be loaded as a shared object (.so) file:
cv::Mat cvImage(RawImage const &src)
friend friend class Module
virtual ~DemoSalGistFaceObj()
Virtual destructor for safe inheritance.
virtual void process(jevois::InputFrame &&inframe, jevois::OutputFrame &&outframe) override
Processing function.
std::shared_ptr< Saliency > itsSaliency
void drawCircle(RawImage &img, int x, int y, unsigned int rad, unsigned int thick, unsigned int col)
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
void require(char const *info, unsigned int w, unsigned int h, unsigned int f) const
unsigned int height
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
std::shared_ptr< FaceDetector > itsFaceDetector
void drawGist(jevois::RawImage &img, unsigned char const *gist, size_t gistsize, unsigned int xoff, unsigned int yoff, unsigned int width, unsigned int scale)
Definition: Saliency.C:771
DemoSalGistFaceObj(std::string const &instance)
Constructor.
std::shared_ptr< ObjectRecognitionBase > itsObjectRecognition
void drawMap(jevois::RawImage &img, env_image const *fmap, unsigned int xoff, unsigned int yoff, unsigned int scale)
Definition: Saliency.C:709
virtual void sendSerial(std::string const &str)
#define LFATAL(msg)
ENV_INTG32_TYPE intg32
32-bit signed integer
Definition: env_types.h:52
void drawFilledRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int col)
std::shared_ptr< Kalman2D > itsKF
JEVOIS_REGISTER_MODULE(DemoSalGistFaceObj)
Simple demo that combines saliency, gist, face detection, and object recognition. ...
#define LINFO(msg)
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
unsigned int width
std::string const & stop()
void paste(RawImage const &src, RawImage &dest, int dx, int dy)