JeVois  1.16
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
NetworkTPU.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #ifdef JEVOIS_PRO
19 
20 #include <jevois/DNN/NetworkTPU.H>
21 #include <jevois/DNN/Utils.H>
22 #include <jevois/Util/Utils.H>
23 #include <edgetpu_c.h>
24 
25 #include <tensorflow/lite/builtin_op_data.h>
26 #include <tensorflow/lite/kernels/register.h>
27 #include <tensorflow/lite/kernels/internal/tensor_ctypes.h> // for GetTensorData()
28 
29 // ####################################################################################################
30 int jevois::dnn::NetworkTPU::ErrorReporter::Report(char const * format, va_list args)
31 {
32  char buf[1024];
33  int ret = vsnprintf(buf, 1024, format, args);
34  LERROR(buf);
35  return ret;
36 }
37 
38 // ####################################################################################################
40 { waitBeforeDestroy(); }
41 
42 // ####################################################################################################
44 {
45  dataroot::freeze(doit);
46  model::freeze(doit);
47  tpunum::freeze(doit);
48  dequant::freeze(doit);
49  intensors::freeze(doit);
50  outtensors::freeze(doit);
51 }
52 
53 // ####################################################################################################
54 std::vector<vsi_nn_tensor_attr_t> jevois::dnn::NetworkTPU::inputShapes()
55 {
56  if (ready() == false) LFATAL("Network is not ready");
58  /*
59  std::vector<vsi_nn_tensor_attr_t> ret;
60  auto const & input_indices = itsInterpreter->inputs();
61 
62  for (size_t i = 0; i < input_indices.size(); ++i)
63  {
64  TfLiteTensor const * itensor = itsInterpreter->tensor(input_indices[i]);
65  if (itensor == nullptr) LFATAL("Network has Null input tensor " << i);
66  ret.emplace_back(jevois::dnn::tensorattr(itensor));
67  }
68  return ret;
69  */
70 }
71 
72 // ####################################################################################################
73 std::vector<vsi_nn_tensor_attr_t> jevois::dnn::NetworkTPU::outputShapes()
74 {
75  if (ready() == false) LFATAL("Network is not ready");
77  /*
78  std::vector<vsi_nn_tensor_attr_t> ret;
79  auto const & output_indices = itsInterpreter->outputs();
80 
81  for (size_t i = 0; i < output_indices.size(); ++i)
82  {
83  TfLiteTensor const * otensor = itsInterpreter->tensor(output_indices[i]);
84  if (otensor == nullptr) LFATAL("Network has Null output tensor " << i);
85  ret.emplace_back(jevois::dnn::tensorattr(otensor));
86  }
87  return ret;
88  */
89 }
90 
91 // ####################################################################################################
93 {
94  // Need to nuke the network first if it exists or we could run out of RAM:
95  itsInterpreter.reset();
96  itsModel.reset();
97 
98  std::string const m = jevois::absolutePath(dataroot::get(), model::get());
99 
100  // Create and load the network:
101  itsModel = tflite::FlatBufferModel::BuildFromFile(m.c_str(), &itsErrorReporter);
102  if (!itsModel) LFATAL("Failed to load model from file " << m);
103 
104  tflite::ops::builtin::BuiltinOpResolver resolver;
105  tflite::InterpreterBuilder(*itsModel, resolver)(&itsInterpreter);
106 
107  size_t num_devices;
108  std::unique_ptr<edgetpu_device, decltype(&edgetpu_free_devices)>
109  devices(edgetpu_list_devices(&num_devices), &edgetpu_free_devices);
110 
111  if (num_devices == 0) LFATAL("No connected TPU found");
112  size_t const tn = tpunum::get();
113  if (tn >= num_devices) LFATAL("Cannot use TPU " << tn << " because only " << num_devices << " TPUs detected.");
114 
115  auto const & device = devices.get()[tn];
116  itsInterpreter->
117  ModifyGraphWithDelegate(std::unique_ptr<TfLiteDelegate, decltype(&edgetpu_free_delegate)>
118  (edgetpu_create_delegate(device.type, device.path, nullptr, 0), &edgetpu_free_delegate));
119 
120  itsInterpreter->SetNumThreads(1);
121 
122  if (itsInterpreter->AllocateTensors() != kTfLiteOk) LFATAL("Failed to allocate tensors");
123 
124  for (size_t i = 0; i < itsInterpreter->inputs().size(); ++i)
125  LINFO("Input tensor " << i << ": " << itsInterpreter->GetInputName(i));
126  for (size_t i = 0; i < itsInterpreter->outputs().size(); ++i)
127  LINFO("Output tensor " << i << ": " << itsInterpreter->GetOutputName(i));
128 
129  int t_size = itsInterpreter->tensors_size();
130  for (int i = 0; i < t_size; ++i)
131  if (itsInterpreter->tensor(i)->name)
132  LINFO("Layer " << i << ": " << itsInterpreter->tensor(i)->name << ", "
133  << jevois::dnn::shapestr(itsInterpreter->tensor(i)) << ", "
134  << itsInterpreter->tensor(i)->bytes << " bytes, scale: "
135  << itsInterpreter->tensor(i)->params.scale << ", zero: "
136  << itsInterpreter->tensor(i)->params.zero_point);
137 
138  //if (threads::get()) itsInterpreter->SetNumThreads(threads::get());
139  for (size_t i = 0; i < itsInterpreter->inputs().size(); ++i)
140  LINFO("input " << i << " is layer " << itsInterpreter->inputs()[i]);
141  for (size_t i = 0; i < itsInterpreter->outputs().size(); ++i)
142  LINFO("output " << i << " is layer " << itsInterpreter->outputs()[i]);
143 }
144 
145 // ####################################################################################################
146 std::vector<cv::Mat> jevois::dnn::NetworkTPU::doprocess(std::vector<cv::Mat> const & blobs,
147  std::vector<std::string> & info)
148 {
149  if ( ! itsInterpreter) LFATAL("Internal inconsistency");
150 
151  if (blobs.size() != itsInterpreter->inputs().size())
152  LFATAL("Received " << blobs.size() << " input tensors, but network wants " << itsInterpreter->inputs().size());
153 
154  auto const & input_indices = itsInterpreter->inputs();
155  for (size_t b = 0; b < blobs.size(); ++b)
156  {
157  cv::Mat const & cvin = blobs[b];
158  auto * itensor = itsInterpreter->tensor(input_indices[b]);
159  if (itensor == nullptr) LFATAL("Network has Null input tensor " << b);
160 
161  // Make sure input dims are a match:
162  TfLiteIntArray const & tfindims = *itensor->dims;
163  cv::MatSize const & cvindims = cvin.size;
164  for (int i = 0; i < tfindims.size; ++i)
165  if (tfindims.data[i] != cvindims[i])
166  LFATAL("Input " << b << " mismatch: blob is " << jevois::dnn::shapestr(cvin) <<
167  " but network wants " << jevois::dnn::shapestr(itensor));
168 
169  // Make sure total sizes in bytes are a match too:
170  size_t const cvsiz = cvin.total() * cvin.elemSize();
171  size_t const tfsiz = itensor->bytes;
172  if (cvsiz != tfsiz) LFATAL("Input " << b << " size mismatch: blob has " << cvsiz <<
173  " but network wants " << tfsiz << " bytes. Maybe type is wrong in intensors?");
174 
175  // Copy input blob to input tensor:
176  uint8_t * input = tflite::GetTensorData<uint8_t>(itensor);
177  if (input == nullptr) LFATAL("Input tensor " << b << " is null in network");
178  std::memcpy(input, cvin.data, cvsiz);
179  info.emplace_back("- Input tensors ok");
180  }
181 
182  // Run the network:
183  if (itsInterpreter->Invoke() != kTfLiteOk) LFATAL("Failed to invoke interpreter");
184  info.emplace_back("- Network forward pass ok");
185 
186  // Collect/convert the outputs:
187  auto const & output_indices = itsInterpreter->outputs();
188  std::vector<cv::Mat> outs;
189 
190  for (size_t o = 0; o < output_indices.size(); ++o)
191  {
192  auto const * otensor = itsInterpreter->tensor(output_indices[o]);
193  if (otensor == nullptr) LFATAL("Network produced Null output tensor " << o);
194 
195  // Allocate an OpenCV output array of dims that match our output tensor:
196  TfLiteIntArray const & tfdims = *otensor->dims;
197  std::vector<int> cvdims; size_t sz = 1;
198  for (int i = 0; i < tfdims.size; ++i) { cvdims.emplace_back(tfdims.data[i]); sz *= tfdims.data[i]; }
199 
200  // Convert/copy output tensor data to OpenCV arrays:
201  TfLiteType const ot = otensor->type;
202  std::string const otname = TfLiteTypeGetName(ot);
203  bool notdone = true;
204 
205  if (dequant::get())
206  {
207  switch (ot)
208  {
209  case kTfLiteUInt8:
210  {
211  // Dequantize UINT8 to FLOAT32:
212  uint8_t const * output = tflite::GetTensorData<uint8_t>(otensor);
213  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
214  cv::Mat cvout(cvdims, CV_32F); float * cvoutdata = (float *)cvout.data;
215 
216  for (size_t i = 0; i < sz; ++i)
217  *cvoutdata++ = (output[i] - otensor->params.zero_point) * otensor->params.scale;
218 
219  info.emplace_back("- Converted " + otname + " output tensor " + std::to_string(o) + " to FLOAT32");
220  outs.emplace_back(cvout);
221  notdone = false;
222  }
223  break;
224 
225  default:
226  // For now, we only know how to dequantize uint8...
227  break;
228  }
229  }
230 
231  if (notdone)
232  {
233  // We just want to copy the data untouched, except that OpenCV does not support as many pixel types as tensorflow:
234  switch (ot)
235  {
236  case kTfLiteInt64: // used by DeepLabV3. Just convert to int32:
237  {
238  cv::Mat cvout(cvdims, CV_32S);
239  int * cvoutdata = (int *)cvout.data;
240  int64_t const * output = tflite::GetTensorData<int64_t>(otensor);
241  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
242  for (size_t i = 0; i < sz; ++i) *cvoutdata++ = int(*output++);
243  info.emplace_back("- Converted " + otname + " output tensor " + std::to_string(o) + " to INT32");
244  outs.emplace_back(cvout);
245  }
246  break;
247 
248  case kTfLiteFloat32:
249  case kTfLiteInt32:
250  case kTfLiteUInt8:
251  case kTfLiteInt16:
252  case kTfLiteInt8:
253  case kTfLiteFloat16:
254  case kTfLiteFloat64:
255  {
256  // Simple copy with no conversion:
257  unsigned int cvtype = jevois::dnn::tf2cv(ot);
258  cv::Mat cvout(cvdims, cvtype);
259  uint8_t const * output = tflite::GetTensorData<uint8_t>(otensor);
260  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
261  std::memcpy(cvout.data, output, sz * jevois::cvBytesPerPix(cvtype));
262  info.emplace_back("- Copied " + otname + " output tensor " + std::to_string(o));
263  outs.emplace_back(cvout);
264  }
265  break;
266 
267  default:
268  LFATAL("Output tensor " << otensor->name << " has unsupported type: " << otname);
269  }
270  }
271  }
272 
273  // Report the TPU temperature:
274  size_t tn = tpunum::get();
275  std::string fn = jevois::sformat("/sys/class/apex/apex_%zu/temp", tpunum::get());
276  try
277  {
278  int temp = std::stoi(jevois::getFileString(fn.c_str()));
279  info.emplace_back(jevois::sformat("- TPU%zu temp %dC", tn, temp / 1000));
280  }
281  catch (...) { } // silently ignore any errors
282 
283  return outs;
284 }
285 
286 #endif // JEVOIS_PRO
jevois::imu::get
Data collection mode RAW means that the latest available raw data is returned each time get() is called
jevois::dnn::NetworkTPU::inputShapes
virtual std::vector< vsi_nn_tensor_attr_t > inputShapes() override
Get shapes of all input tensors.
Definition: NetworkTPU.C:54
jevois::sformat
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition: Utils.C:401
jevois::dnn::tf2cv
int tf2cv(TfLiteType t)
Convert from TensorFlow data type to OpenCV.
Definition: Utils.C:182
jevois::cvBytesPerPix
unsigned int cvBytesPerPix(unsigned int cvtype)
Return the number of bytes per pixel for a given OpenCV pixel type.
Definition: Utils.C:88
jevois::dnn::NetworkTPU::outputShapes
virtual std::vector< vsi_nn_tensor_attr_t > outputShapes() override
Get shapes of all output tensors.
Definition: NetworkTPU.C:73
Utils.H
o
#define o
Definition: Font10x20.C:6
NetworkTPU.H
jevois::dnn::Network::waitBeforeDestroy
void waitBeforeDestroy()
If network is currently loading, wait until that is done before destroying.
Definition: Network.C:27
LERROR
#define LERROR(msg)
Convenience macro for users to print out console or syslog messages, ERROR level.
Definition: Log.H:198
jevois::getFileString
std::string getFileString(char const *fname, int skip=0)
Read one line from a file and return it as a string.
Definition: Utils.C:453
jevois::dnn::postprocessor::format
Type of detection output format
Definition: PostProcessor.H:75
jevois::dnn::NetworkTPU::doprocess
std::vector< cv::Mat > doprocess(std::vector< cv::Mat > const &blobs, std::vector< std::string > &info) override
Process input blobs and obtain output blobs.
Definition: NetworkTPU.C:146
jevois::dnn::parseTensorSpecs
std::vector< vsi_nn_tensor_attr_t > parseTensorSpecs(std::string const &specs)
Parse tensor specification.
Definition: Utils.C:260
jevois::dnn::shapestr
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition: Utils.C:105
jevois::dnn::NetworkTPU::freeze
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
Definition: NetworkTPU.C:43
LFATAL
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition: Log.H:217
jevois::absolutePath
std::string absolutePath(std::string const &root, std::string const &path)
Compute an absolute path from two paths.
Definition: Utils.C:347
jevois::dnn::NetworkTPU::load
void load() override
Load from disk.
Definition: NetworkTPU.C:92
jevois::to_string
std::string to_string(T const &val)
Convert from type to string.
jevois::dnn::NetworkTPU::~NetworkTPU
virtual ~NetworkTPU()
Destructor.
Definition: NetworkTPU.C:39
Utils.H
LINFO
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
Definition: Log.H:181