JeVoisBase  1.6
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
DemoNeon.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #include <jevois/Core/Module.H>
19 #include <jevois/Debug/Log.H>
20 #include <jevois/Util/Utils.H>
22 #include <jevois/Debug/Timer.H>
23 
24 #include <linux/videodev2.h>
25 #include <opencv2/core/core.hpp>
26 #include <opencv2/imgproc/imgproc.hpp>
27 #include <string.h>
28 
29 // Neon-related:
30 #include <Ne10/inc/NE10_imgproc.h>
31 
32 // icon by by Madebyoliver in technology at flaticon
33 
34 namespace
35 {
36  // OpenCV's cvtColor() cannot convert from RGBA to YUYV. Found this code here and cleaned it up a bit:
37  // http://study.marearts.com/2014/12/yuyv-to-rgb-and-rgb-to-yuyv-using.html
38  class Parallel_process : public cv::ParallelLoopBody
39  {
40  private:
41  cv::Mat const & inImg;
42  unsigned char * outImg;
43  int widhStep;
44  int m_stride;
45 
46  public:
47  Parallel_process(cv::Mat const & inputImgage, unsigned char* outImage, size_t outw) :
48  inImg(inputImgage), outImg(outImage)
49  {
50  widhStep = inputImgage.size().width * 4; // 4bpp for RGBA
51  m_stride = outw * 2; // 2bpp for YUYV
52  }
53 
54  virtual void operator()(const cv::Range & range) const
55  {
56  for (int i = range.start; i < range.end; ++i)
57  {
58  int const s1 = i * widhStep;
59 
60  for (int iw = 0; iw < inImg.size().width; iw += 2)
61  {
62  int const s2 = iw * 4; int mc = s1 + s2;
63  float const R1 = inImg.data[mc + 0];
64  float const G1 = inImg.data[mc + 1];
65  float const B1 = inImg.data[mc + 2];
66  // skip A
67  float const R2 = inImg.data[mc + 4];
68  float const G2 = inImg.data[mc + 5];
69  float const B2 = inImg.data[mc + 6];
70  // skip A
71 
72  int Y = (0.257F * R1) + (0.504F * G1) + (0.098F * B1) + 16;
73  int U = -(0.148F * R1) - (0.291F * G1) + (0.439F * B1) + 128;
74  int V = (0.439F * R1 ) - (0.368F * G1) - (0.071F * B1) + 128;
75  int Y2 = (0.257F * R2) + (0.504F * G2) + (0.098F * B2) + 16;
76 
77  if (Y > 255) Y = 255; else if (Y < 0) Y = 0;
78  if (U > 255) U = 255; else if (U < 0) U = 0;
79  if (V > 255) V = 255; else if (V < 0) V = 0;
80  if (Y2 > 255) Y2 = 255; else if (Y2 < 0) Y2 = 0;
81 
82  mc = i * m_stride + iw * 2;
83  outImg[mc + 0] = Y; outImg[mc + 1] = U; outImg[mc + 2] = Y2; outImg[mc + 3] = V;
84  }
85  }
86  }
87  };
88 
89  void rgba2yuyv(cv::Mat const & src, unsigned char * dst, size_t dstw)
90  { cv::parallel_for_(cv::Range(0, src.rows), Parallel_process(src, dst, dstw)); }
91 
92 } // anonymous namespace
93 
94 // Module parameters: allow user to play with filter kernel size
95 static jevois::ParameterCategory const ParamCateg("Neon Demo Options");
96 
97 //! Parameter \relates DemoNeon
98 JEVOIS_DECLARE_PARAMETER(kernelw, unsigned int, "Kernel width (pixels)", 5, ParamCateg);
99 
100 //! Parameter \relates DemoNeon
101 JEVOIS_DECLARE_PARAMETER(kernelh, unsigned int, "Kernel height (pixels)", 5, ParamCateg);
102 
103 //! Simple demo of ARM Neon (SIMD) extensions, comparing a box filter (blur) between CPU and Neon
104 /*! NEON are specialized ARM processor instructions that can handle several operations at once, for example, 8 additions
105  of 8 bytes with 8 other bytes. NEON is the counterpart for ARM architectures of SSE for Intel architectures.
106 
107  They are very useful for image processing. NEON instructions are supported both by the JeVois hardware platform and
108  by the JeVois programming framework.
109 
110  In fact, one can directly call NEON instructions using C-like function calls and specialized C data types to
111  represent small vectors of numbers (like 8 bytes).
112 
113  This demo uses a blur filter from the open-source NE10 library. It compares processing time to apply the same filter
114  to the input video stream, either using conventional C code, or using NEON-accelerated code. The NEON-accelerated
115  code is about 6x faster.
116 
117  For more examples of use of NEON on JeVois, see modules \jvmod{DarknetSingle}, \jvmod{DarknetYOLO}, and
118  \jvmod{DarknetSaliency} which use NEON to accelerate the deep neural networks implemented in these modules.
119 
120 
121  @author Laurent Itti
122 
123  @displayname Demo NEON
124  @videomapping YUYV 960 240 30.0 YUYV 320 240 30.0 JeVois DemoNeon
125  @email itti\@usc.edu
126  @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
127  @copyright Copyright (C) 2016 by Laurent Itti, iLab and the University of Southern California
128  @mainurl http://jevois.org
129  @supporturl http://jevois.org/doc
130  @otherurl http://iLab.usc.edu
131  @license GPL v3
132  @distribution Unrestricted
133  @restrictions None
134  \ingroup modules */
135 class DemoNeon : public jevois::Module,
136  public jevois::Parameter<kernelw, kernelh>
137 {
138  public:
139  //! Default base class constructor ok
141 
142  //! Virtual destructor for safe inheritance
143  virtual ~DemoNeon() { }
144 
145  //! Processing function
146  virtual void process(jevois::InputFrame && inframe, jevois::OutputFrame && outframe) override
147  {
148  static jevois::Timer cputim("CPU time");
149  static jevois::Timer neontim("Neon time");
150 
151  // Wait for next available camera image:
152  jevois::RawImage inimg = inframe.get();
153  unsigned int const w = inimg.width, h = inimg.height;
154  inimg.require("input", w, h, V4L2_PIX_FMT_YUYV); // any image size but require YUYV pixels
155 
156  // While we convert it, start a thread to wait for out frame and paste the input into it:
157  jevois::RawImage outimg;
158  auto paste_fut = std::async(std::launch::async, [&]() {
159  outimg = outframe.get();
160  outimg.require("output", w * 3, h, inimg.fmt);
161  jevois::rawimage::paste(inimg, outimg, 0, 0);
162  jevois::rawimage::writeText(outimg, "JeVois NEON Demo", 3, 3, jevois::yuyv::White);
163  });
164 
165  // Convert input frame to RGBA:
166  cv::Mat imgrgba = jevois::rawimage::convertToCvRGBA(inimg);
167 
168  // Wait for paste to finish up:
169  paste_fut.get();
170 
171  // Let camera know we are done processing the input image:
172  inframe.done();
173 
174  // First, apply blur filter using CPU:
175  ne10_size_t src_size { w, h }, kernel_size { kernelw::get(), kernelh::get() };
176 
177  cv::Mat cpuresult(h, w, CV_8UC4);
178  cputim.start();
179  ne10_img_boxfilter_rgba8888_c(imgrgba.data, cpuresult.data, src_size, w * 4, w * 4, kernel_size);
180  std::string const & cpufps = cputim.stop();
181 
182  // Then apply it using neon:
183  cv::Mat neonresult(h, w, CV_8UC4);
184  neontim.start();
185 
186 #ifdef __ARM_NEON__
187  // Neon version:
188  ne10_img_boxfilter_rgba8888_neon(imgrgba.data, neonresult.data, src_size, w * 4, w * 4, kernel_size);
189 #else
190  // On non-ARM/NEON host, revert to CPU version again:
191  ne10_img_boxfilter_rgba8888_c(imgrgba.data, neonresult.data, src_size, w * 4, w * 4, kernel_size);
192 #endif
193 
194  std::string const & neonfps = neontim.stop();
195 
196  // Convert both results back to YUYV for display:
197  rgba2yuyv(cpuresult, outimg.pixelsw<unsigned char>() + w * 2, w * 3);
198  jevois::rawimage::writeText(outimg, "Box filter - CPU", w + 3, 3, jevois::yuyv::White);
199  rgba2yuyv(neonresult, outimg.pixelsw<unsigned char>() + w * 4, w * 3);
200  jevois::rawimage::writeText(outimg, "Box filter - NEON", w * 2 + 3, 3, jevois::yuyv::White);
201 
202  // Show processing fps:
203  jevois::rawimage::writeText(outimg, cpufps, w + 3, h - 13, jevois::yuyv::White);
204  jevois::rawimage::writeText(outimg, neonfps, w * 2 + 3, h - 13, jevois::yuyv::White);
205 
206  // Send the output image with our processing results to the host over USB:
207  outframe.send();
208  }
209 };
210 
211 // Allow the module to be loaded as a shared object (.so) file:
cv::Mat convertToCvRGBA(RawImage const &src)
JEVOIS_REGISTER_MODULE(DemoNeon)
friend friend class Module
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
unsigned int height
#define B1
Definition: Surprise.C:81
unsigned int fmt
Simple demo of ARM Neon (SIMD) extensions, comparing a box filter (blur) between CPU and Neon...
Definition: DemoNeon.C:135
JEVOIS_DECLARE_PARAMETER(thresh1, double, "First threshold for hysteresis", 50.0, ParamCateg)
std::string const & stop()
#define B2
Definition: Surprise.C:82
virtual ~DemoNeon()
Virtual destructor for safe inheritance.
Definition: DemoNeon.C:143
virtual void process(jevois::InputFrame &&inframe, jevois::OutputFrame &&outframe) override
Processing function.
Definition: DemoNeon.C:146
unsigned int width
void paste(RawImage const &src, RawImage &dest, int dx, int dy)
void require(char const *info, unsigned int w, unsigned int h, unsigned int f) const