24#include <linux/videodev2.h>
25#include <opencv2/core/core.hpp>
26#include <opencv2/imgproc/imgproc.hpp>
30#include <NE10_imgproc.h>
38 class Parallel_process :
public cv::ParallelLoopBody
41 cv::Mat
const & inImg;
42 unsigned char * outImg;
47 Parallel_process(cv::Mat
const & inputImgage,
unsigned char* outImage,
size_t outw) :
48 inImg(inputImgage), outImg(outImage)
50 widhStep = inputImgage.size().width * 4;
54 virtual void operator()(
const cv::Range & range)
const
56 for (
int i = range.start; i < range.end; ++i)
58 int const s1 = i * widhStep;
60 for (
int iw = 0; iw < inImg.size().width; iw += 2)
62 int const s2 = iw * 4;
int mc = s1 + s2;
63 float const R1 = inImg.data[mc + 0];
64 float const G1 = inImg.data[mc + 1];
65 float const B1 = inImg.data[mc + 2];
67 float const R2 = inImg.data[mc + 4];
68 float const G2 = inImg.data[mc + 5];
69 float const B2 = inImg.data[mc + 6];
72 int Y = (0.257F * R1) + (0.504F * G1) + (0.098F *
B1) + 16;
73 int U = -(0.148F * R1) - (0.291F * G1) + (0.439F *
B1) + 128;
74 int V = (0.439F * R1 ) - (0.368F * G1) - (0.071F *
B1) + 128;
75 int Y2 = (0.257F * R2) + (0.504F * G2) + (0.098F *
B2) + 16;
77 if (Y > 255) Y = 255;
else if (Y < 0) Y = 0;
78 if (U > 255) U = 255;
else if (U < 0) U = 0;
79 if (
V > 255)
V = 255;
else if (
V < 0)
V = 0;
80 if (Y2 > 255) Y2 = 255;
else if (Y2 < 0) Y2 = 0;
82 mc = i * m_stride + iw * 2;
83 outImg[mc + 0] = Y; outImg[mc + 1] = U; outImg[mc + 2] = Y2; outImg[mc + 3] =
V;
89 void rgba2yuyv(cv::Mat
const & src,
unsigned char * dst,
size_t dstw)
90 { cv::parallel_for_(cv::Range(0, src.rows), Parallel_process(src, dst, dstw)); }
136 public jevois::Parameter<kernelw, kernelh>
154 inimg.
require(
"input", w,
h, V4L2_PIX_FMT_YUYV);
159 outimg = outframe.get();
175 ne10_size_t src_size { w,
h }, kernel_size { kernelw::get(), kernelh::get() };
177 cv::Mat cpuresult(
h, w, CV_8UC4);
179 ne10_img_boxfilter_rgba8888_c(imgrgba.data, cpuresult.data, src_size, w * 4, w * 4, kernel_size);
180 std::string
const & cpufps = cputim.
stop();
183 cv::Mat neonresult(
h, w, CV_8UC4);
188 ne10_img_boxfilter_rgba8888_neon(imgrgba.data, neonresult.data, src_size, w * 4, w * 4, kernel_size);
191 ne10_img_boxfilter_rgba8888_c(imgrgba.data, neonresult.data, src_size, w * 4, w * 4, kernel_size);
194 std::string
const & neonfps = neontim.
stop();
197 rgba2yuyv(cpuresult, outimg.
pixelsw<
unsigned char>() + w * 2, w * 3);
199 rgba2yuyv(neonresult, outimg.
pixelsw<
unsigned char>() + w * 4, w * 3);
JEVOIS_REGISTER_MODULE(ArUcoBlob)
Simple demo of ARM Neon (SIMD) extensions, comparing a box filter (blur) between CPU and Neon.
virtual ~DemoNeon()
Virtual destructor for safe inheritance.
virtual void process(jevois::InputFrame &&inframe, jevois::OutputFrame &&outframe) override
Processing function.
JEVOIS_DECLARE_PARAMETER(kernelw, unsigned int, "Kernel width (pixels)", 5, ParamCateg)
Parameter.
JEVOIS_DECLARE_PARAMETER(kernelh, unsigned int, "Kernel height (pixels)", 5, ParamCateg)
Parameter.
friend friend class Module
void require(char const *info, unsigned int w, unsigned int h, unsigned int f) const
std::string const & stop(double *seconds)
cv::Mat convertToCvRGBA(RawImage const &src)
void paste(RawImage const &src, RawImage &dest, int dx, int dy)
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
unsigned short constexpr White