JeVois  1.20
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
NetworkTPU.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #ifdef JEVOIS_PRO
19 
20 #include <jevois/DNN/NetworkTPU.H>
21 #include <jevois/DNN/Utils.H>
22 #include <jevois/Util/Utils.H>
23 #include <edgetpu_c.h>
24 
25 #include <tensorflow/lite/builtin_op_data.h>
26 #include <tensorflow/lite/kernels/register.h>
27 #include <tensorflow/lite/kernels/internal/tensor_ctypes.h> // for GetTensorData()
28 
29 // ####################################################################################################
30 int jevois::dnn::NetworkTPU::ErrorReporter::Report(char const * format, va_list args)
31 {
32  static char buf[2048];
33  int ret = vsnprintf(buf, 2048, format, args);
34  itsErrors.push_back(buf);
35  LERROR(buf);
36  return ret;
37 }
38 
39 // ####################################################################################################
41 { waitBeforeDestroy(); }
42 
43 // ####################################################################################################
45 {
46  dataroot::freeze(doit);
47  model::freeze(doit);
48  tpunum::freeze(doit);
49  dequant::freeze(doit);
50  intensors::freeze(doit);
51  outtensors::freeze(doit);
52  jevois::dnn::Network::freeze(doit); // base class parameters
53 }
54 
55 // ####################################################################################################
56 std::vector<vsi_nn_tensor_attr_t> jevois::dnn::NetworkTPU::inputShapes()
57 {
58  if (ready() == false) LFATAL("Network is not ready");
59 
60  // Shapes are embedded in the network file, but can be overridden:
61  std::string const inshapes = intensors::get();
62  if (inshapes.empty() == false) return jevois::dnn::parseTensorSpecs(inshapes);
63 
64  // Get the shapes from the network:
65  std::vector<vsi_nn_tensor_attr_t> ret;
66  auto const & input_indices = itsInterpreter->inputs();
67 
68  for (size_t i = 0; i < input_indices.size(); ++i)
69  {
70  TfLiteTensor const * itensor = itsInterpreter->tensor(input_indices[i]);
71  if (itensor == nullptr) LFATAL("Network has Null input tensor " << i);
72  ret.emplace_back(jevois::dnn::tensorattr(itensor));
73  LINFO("Input " << i << ": " << jevois::dnn::attrstr(ret.back()));
74  }
75  return ret;
76 }
77 
78 // ####################################################################################################
79 std::vector<vsi_nn_tensor_attr_t> jevois::dnn::NetworkTPU::outputShapes()
80 {
81  if (ready() == false) LFATAL("Network is not ready");
82 
83  // Shapes are embedded in the network file, but can be overridden:
84  std::string const outshapes = outtensors::get();
85  if (outshapes.empty() == false) return jevois::dnn::parseTensorSpecs(outshapes);
86 
87  // Get the shapes from the network:
88  std::vector<vsi_nn_tensor_attr_t> ret;
89  auto const & output_indices = itsInterpreter->outputs();
90 
91  for (size_t i = 0; i < output_indices.size(); ++i)
92  {
93  TfLiteTensor const * otensor = itsInterpreter->tensor(output_indices[i]);
94  if (otensor == nullptr) LFATAL("Network has Null output tensor " << i);
95  ret.emplace_back(jevois::dnn::tensorattr(otensor));
96  LINFO("Output " << i << ": " << jevois::dnn::attrstr(ret.back()));
97  }
98  return ret;
99 }
100 
101 // ####################################################################################################
103 {
104  // Need to nuke the network first if it exists or we could run out of RAM:
105  itsInterpreter.reset();
106  itsModel.reset();
107  itsErrorReporter.itsErrors.clear();
108 
109  std::string const m = jevois::absolutePath(dataroot::get(), model::get());
110 
111  try
112  {
113  // Create and load the network:
114  itsModel = tflite::FlatBufferModel::BuildFromFile(m.c_str(), &itsErrorReporter);
115  if (!itsModel) LFATAL("Failed to load model from file " << m);
116 
117  tflite::ops::builtin::BuiltinOpResolver resolver;
118  tflite::InterpreterBuilder(*itsModel, resolver)(&itsInterpreter);
119 
120  size_t num_devices;
121  std::unique_ptr<edgetpu_device, decltype(&edgetpu_free_devices)>
122  devices(edgetpu_list_devices(&num_devices), &edgetpu_free_devices);
123 
124  if (num_devices == 0) LFATAL("No connected TPU found");
125  size_t const tn = tpunum::get();
126  if (tn >= num_devices) LFATAL("Cannot use TPU " << tn << " because only " << num_devices << " TPUs detected.");
127 
128  auto const & device = devices.get()[tn];
129  itsInterpreter->
130  ModifyGraphWithDelegate(std::unique_ptr<TfLiteDelegate, decltype(&edgetpu_free_delegate)>
131  (edgetpu_create_delegate(device.type, device.path, nullptr, 0), &edgetpu_free_delegate));
132 
133  itsInterpreter->SetNumThreads(1);
134 
135  if (itsInterpreter->AllocateTensors() != kTfLiteOk) LFATAL("Failed to allocate tensors");
136 
137  for (size_t i = 0; i < itsInterpreter->inputs().size(); ++i)
138  LINFO("Input tensor " << i << ": " << itsInterpreter->GetInputName(i));
139  for (size_t i = 0; i < itsInterpreter->outputs().size(); ++i)
140  LINFO("Output tensor " << i << ": " << itsInterpreter->GetOutputName(i));
141 
142  int t_size = itsInterpreter->tensors_size();
143  for (int i = 0; i < t_size; ++i)
144  if (itsInterpreter->tensor(i)->name)
145  LINFO("Layer " << i << ": " << itsInterpreter->tensor(i)->name << ", "
146  << jevois::dnn::shapestr(itsInterpreter->tensor(i)) << ", "
147  << itsInterpreter->tensor(i)->bytes << " bytes, scale: "
148  << itsInterpreter->tensor(i)->params.scale << ", zero: "
149  << itsInterpreter->tensor(i)->params.zero_point);
150 
151  //if (threads::get()) itsInterpreter->SetNumThreads(threads::get());
152  for (size_t i = 0; i < itsInterpreter->inputs().size(); ++i)
153  LINFO("input " << i << " is layer " << itsInterpreter->inputs()[i]);
154  for (size_t i = 0; i < itsInterpreter->outputs().size(); ++i)
155  LINFO("output " << i << " is layer " << itsInterpreter->outputs()[i]);
156  }
157  catch (std::exception const & e)
158  {
159  std::string err = "\n";
160  for (std::string const & s : itsErrorReporter.itsErrors) err += "ERR " + s + "\n";
161  err += e.what();
162  throw std::runtime_error(err);
163  }
164 }
165 
166 // ####################################################################################################
167 std::vector<cv::Mat> jevois::dnn::NetworkTPU::doprocess(std::vector<cv::Mat> const & blobs,
168  std::vector<std::string> & info)
169 {
170  if ( ! itsInterpreter) LFATAL("Internal inconsistency");
171 
172  if (blobs.size() != itsInterpreter->inputs().size())
173  LFATAL("Received " << blobs.size() << " input tensors, but network wants " << itsInterpreter->inputs().size());
174 
175  auto const & input_indices = itsInterpreter->inputs();
176  for (size_t b = 0; b < blobs.size(); ++b)
177  {
178  cv::Mat const & cvin = blobs[b];
179  auto * itensor = itsInterpreter->tensor(input_indices[b]);
180  if (itensor == nullptr) LFATAL("Network has Null input tensor " << b);
181 
182  // Make sure input dims are a match:
183  TfLiteIntArray const & tfindims = *itensor->dims;
184  cv::MatSize const & cvindims = cvin.size;
185  for (int i = 0; i < tfindims.size; ++i)
186  if (tfindims.data[i] != cvindims[i])
187  LFATAL("Input " << b << " mismatch: blob is " << jevois::dnn::shapestr(cvin) <<
188  " but network wants " << jevois::dnn::shapestr(itensor));
189 
190  // Make sure total sizes in bytes are a match too:
191  size_t const cvsiz = cvin.total() * cvin.elemSize();
192  size_t const tfsiz = itensor->bytes;
193  if (cvsiz != tfsiz) LFATAL("Input " << b << " size mismatch: blob has " << cvsiz <<
194  " but network wants " << tfsiz << " bytes. Maybe type is wrong in intensors?");
195 
196  // Copy input blob to input tensor:
197  uint8_t * input = tflite::GetTensorData<uint8_t>(itensor);
198  if (input == nullptr) LFATAL("Input tensor " << b << " is null in network");
199  std::memcpy(input, cvin.data, cvsiz);
200  info.emplace_back("- Input tensors ok");
201  }
202 
203  // Run the network:
204  if (itsInterpreter->Invoke() != kTfLiteOk) LFATAL("Failed to invoke interpreter");
205  info.emplace_back("- Network forward pass ok");
206 
207  // Collect/convert the outputs:
208  auto const & output_indices = itsInterpreter->outputs();
209  std::vector<cv::Mat> outs;
210 
211  for (size_t o = 0; o < output_indices.size(); ++o)
212  {
213  auto const * otensor = itsInterpreter->tensor(output_indices[o]);
214  if (otensor == nullptr) LFATAL("Network produced Null output tensor " << o);
215 
216  // Allocate an OpenCV output array of dims that match our output tensor:
217  TfLiteIntArray const & tfdims = *otensor->dims;
218  std::vector<int> cvdims; size_t sz = 1;
219  for (int i = 0; i < tfdims.size; ++i) { cvdims.emplace_back(tfdims.data[i]); sz *= tfdims.data[i]; }
220 
221  // Convert/copy output tensor data to OpenCV arrays:
222  TfLiteType const ot = otensor->type;
223  std::string const otname = TfLiteTypeGetName(ot);
224  bool notdone = true;
225 
226  if (dequant::get())
227  {
228  switch (ot)
229  {
230  case kTfLiteUInt8:
231  {
232  // Dequantize UINT8 to FLOAT32:
233  uint8_t const * output = tflite::GetTensorData<uint8_t>(otensor);
234  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
235  cv::Mat const cvi(cvdims, CV_8U, (void *)output);
236  cv::Mat cvout; cvi.convertTo(cvout, CV_32F);
237  cvout -= otensor->params.zero_point;
238  cvout *= otensor->params.scale;
239  info.emplace_back("- Dequantized " + otname + " output tensor " + std::to_string(o) + " to FLOAT32");
240  outs.emplace_back(cvout);
241  notdone = false;
242  }
243  break;
244 
245  default:
246  // For now, we only know how to dequantize uint8...
247  break;
248  }
249  }
250 
251  if (notdone)
252  {
253  // We just want to copy the data untouched, except that OpenCV does not support as many pixel types as tensorflow:
254  switch (ot)
255  {
256  case kTfLiteInt64: // used by DeepLabV3. Just convert to int32:
257  {
258  cv::Mat cvout(cvdims, CV_32S);
259  int * cvoutdata = (int *)cvout.data;
260  int64_t const * output = tflite::GetTensorData<int64_t>(otensor);
261  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
262  for (size_t i = 0; i < sz; ++i) *cvoutdata++ = int(*output++);
263  info.emplace_back("- Converted " + otname + " output tensor " + std::to_string(o) + " to INT32");
264  outs.emplace_back(cvout);
265  }
266  break;
267 
268  case kTfLiteFloat32:
269  case kTfLiteInt32:
270  case kTfLiteUInt8:
271  case kTfLiteInt16:
272  case kTfLiteInt8:
273  case kTfLiteFloat16:
274  case kTfLiteFloat64:
275  {
276  // Simple copy with no conversion:
277  unsigned int cvtype = jevois::dnn::tf2cv(ot);
278  cv::Mat cvout(cvdims, cvtype);
279  uint8_t const * output = tflite::GetTensorData<uint8_t>(otensor);
280  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
281  std::memcpy(cvout.data, output, sz * jevois::cvBytesPerPix(cvtype));
282  info.emplace_back("- Copied " + otname + " output tensor " + std::to_string(o));
283  outs.emplace_back(cvout);
284  }
285  break;
286 
287  default:
288  LFATAL("Output tensor " << otensor->name << " has unsupported type: " << otname);
289  }
290  }
291  }
292 
293  // Report the TPU temperature:
294  static int temp = 0;
295  size_t const tn = tpunum::get();
296  if ((jevois::frameNum() % 50) == 0)
297  try { temp = std::stoi(jevois::getFileString(jevois::sformat("/sys/class/apex/apex_%zu/temp", tn).c_str())); }
298  catch (...) { } // silently ignore any errors
299  info.emplace_back(jevois::sformat("- TPU%zu temp %dC", tn, temp / 1000));
300 
301  return outs;
302 }
303 
304 #endif // JEVOIS_PRO
jevois::imu::get
Data collection mode RAW means that the latest available raw data is returned each time get() is called
jevois::dnn::NetworkTPU::inputShapes
virtual std::vector< vsi_nn_tensor_attr_t > inputShapes() override
Get shapes of all input tensors.
Definition: NetworkTPU.C:56
jevois::sformat
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition: Utils.C:439
jevois::dnn::tf2cv
int tf2cv(TfLiteType t)
Convert from TensorFlow data type to OpenCV.
Definition: Utils.C:285
jevois::cvBytesPerPix
unsigned int cvBytesPerPix(unsigned int cvtype)
Return the number of bytes per pixel for a given OpenCV pixel type.
Definition: Utils.C:89
jevois::dnn::NetworkTPU::outputShapes
virtual std::vector< vsi_nn_tensor_attr_t > outputShapes() override
Get shapes of all output tensors.
Definition: NetworkTPU.C:79
Utils.H
jevois::dnn::tensorattr
vsi_nn_tensor_attr_t tensorattr(TfLiteTensor const *t)
Get tensor shape and type attributes for a TensorFlow Lite tensor.
Definition: Utils.C:562
o
#define o
Definition: Font10x20.C:6
NetworkTPU.H
jevois::dnn::Network::waitBeforeDestroy
void waitBeforeDestroy()
If network is currently loading, wait until that is done before destroying.
Definition: Network.C:45
jevois::dnn::Network::freeze
virtual void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
Definition: Network.C:27
LERROR
#define LERROR(msg)
Convenience macro for users to print out console or syslog messages, ERROR level.
Definition: Log.H:211
jevois::getFileString
std::string getFileString(char const *fname, int skip=0)
Read one line from a file and return it as a string.
Definition: Utils.C:541
jevois::dnn::NetworkTPU::doprocess
std::vector< cv::Mat > doprocess(std::vector< cv::Mat > const &blobs, std::vector< std::string > &info) override
Process input blobs and obtain output blobs.
Definition: NetworkTPU.C:167
jevois::dnn::parseTensorSpecs
std::vector< vsi_nn_tensor_attr_t > parseTensorSpecs(std::string const &specs)
Parse tensor specification.
Definition: Utils.C:386
jevois::absolutePath
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition: Utils.C:385
jevois::dnn::shapestr
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition: Utils.C:104
jevois::dnn::NetworkTPU::freeze
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
Definition: NetworkTPU.C:44
LFATAL
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
jevois::dnn::NetworkTPU::load
void load() override
Load from disk.
Definition: NetworkTPU.C:102
jevois::to_string
std::string to_string(T const &val)
Convert from type to string.
jevois::dnn::NetworkTPU::~NetworkTPU
virtual ~NetworkTPU()
Destructor.
Definition: NetworkTPU.C:40
jevois::dnn::attrstr
std::string attrstr(vsi_nn_tensor_attr_t const &attr)
Get a string describing the specs of a tensor, including quantification specs (not provided by shapes...
Definition: Utils.C:511
Utils.H
LINFO
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
Definition: Log.H:194