JeVois  1.18
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
NetworkTPU.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
18 #ifdef JEVOIS_PRO
19 
20 #include <jevois/DNN/NetworkTPU.H>
21 #include <jevois/DNN/Utils.H>
22 #include <jevois/Util/Utils.H>
23 #include <edgetpu_c.h>
24 
25 #include <tensorflow/lite/builtin_op_data.h>
26 #include <tensorflow/lite/kernels/register.h>
27 #include <tensorflow/lite/kernels/internal/tensor_ctypes.h> // for GetTensorData()
28 
29 // ####################################################################################################
30 int jevois::dnn::NetworkTPU::ErrorReporter::Report(char const * format, va_list args)
31 {
32  static char buf[2048];
33  int ret = vsnprintf(buf, 2048, format, args);
34  itsErrors.push_back(buf);
35  LERROR(buf);
36  return ret;
37 }
38 
39 // ####################################################################################################
41 { waitBeforeDestroy(); }
42 
43 // ####################################################################################################
45 {
46  dataroot::freeze(doit);
47  model::freeze(doit);
48  tpunum::freeze(doit);
49  dequant::freeze(doit);
50  intensors::freeze(doit);
51  outtensors::freeze(doit);
52 }
53 
54 // ####################################################################################################
55 std::vector<vsi_nn_tensor_attr_t> jevois::dnn::NetworkTPU::inputShapes()
56 {
57  if (ready() == false) LFATAL("Network is not ready");
58 
59  // Shapes are embedded in the network file, but can be overridden:
60  std::string const inshapes = intensors::get();
61  if (inshapes.empty() == false) return jevois::dnn::parseTensorSpecs(inshapes);
62 
63  // Get the shapes from the network:
64  std::vector<vsi_nn_tensor_attr_t> ret;
65  auto const & input_indices = itsInterpreter->inputs();
66 
67  for (size_t i = 0; i < input_indices.size(); ++i)
68  {
69  TfLiteTensor const * itensor = itsInterpreter->tensor(input_indices[i]);
70  if (itensor == nullptr) LFATAL("Network has Null input tensor " << i);
71  ret.emplace_back(jevois::dnn::tensorattr(itensor));
72  LINFO("Input " << i << ": " << jevois::dnn::attrstr(ret.back()));
73  }
74  return ret;
75 }
76 
77 // ####################################################################################################
78 std::vector<vsi_nn_tensor_attr_t> jevois::dnn::NetworkTPU::outputShapes()
79 {
80  if (ready() == false) LFATAL("Network is not ready");
81 
82  // Shapes are embedded in the network file, but can be overridden:
83  std::string const outshapes = outtensors::get();
84  if (outshapes.empty() == false) return jevois::dnn::parseTensorSpecs(outshapes);
85 
86  // Get the shapes from the network:
87  std::vector<vsi_nn_tensor_attr_t> ret;
88  auto const & output_indices = itsInterpreter->outputs();
89 
90  for (size_t i = 0; i < output_indices.size(); ++i)
91  {
92  TfLiteTensor const * otensor = itsInterpreter->tensor(output_indices[i]);
93  if (otensor == nullptr) LFATAL("Network has Null output tensor " << i);
94  ret.emplace_back(jevois::dnn::tensorattr(otensor));
95  LINFO("Output " << i << ": " << jevois::dnn::attrstr(ret.back()));
96  }
97  return ret;
98 }
99 
100 // ####################################################################################################
102 {
103  // Need to nuke the network first if it exists or we could run out of RAM:
104  itsInterpreter.reset();
105  itsModel.reset();
106  itsErrorReporter.itsErrors.clear();
107 
108  std::string const m = jevois::absolutePath(dataroot::get(), model::get());
109 
110  try
111  {
112  // Create and load the network:
113  itsModel = tflite::FlatBufferModel::BuildFromFile(m.c_str(), &itsErrorReporter);
114  if (!itsModel) LFATAL("Failed to load model from file " << m);
115 
116  tflite::ops::builtin::BuiltinOpResolver resolver;
117  tflite::InterpreterBuilder(*itsModel, resolver)(&itsInterpreter);
118 
119  size_t num_devices;
120  std::unique_ptr<edgetpu_device, decltype(&edgetpu_free_devices)>
121  devices(edgetpu_list_devices(&num_devices), &edgetpu_free_devices);
122 
123  if (num_devices == 0) LFATAL("No connected TPU found");
124  size_t const tn = tpunum::get();
125  if (tn >= num_devices) LFATAL("Cannot use TPU " << tn << " because only " << num_devices << " TPUs detected.");
126 
127  auto const & device = devices.get()[tn];
128  itsInterpreter->
129  ModifyGraphWithDelegate(std::unique_ptr<TfLiteDelegate, decltype(&edgetpu_free_delegate)>
130  (edgetpu_create_delegate(device.type, device.path, nullptr, 0), &edgetpu_free_delegate));
131 
132  itsInterpreter->SetNumThreads(1);
133 
134  if (itsInterpreter->AllocateTensors() != kTfLiteOk) LFATAL("Failed to allocate tensors");
135 
136  for (size_t i = 0; i < itsInterpreter->inputs().size(); ++i)
137  LINFO("Input tensor " << i << ": " << itsInterpreter->GetInputName(i));
138  for (size_t i = 0; i < itsInterpreter->outputs().size(); ++i)
139  LINFO("Output tensor " << i << ": " << itsInterpreter->GetOutputName(i));
140 
141  int t_size = itsInterpreter->tensors_size();
142  for (int i = 0; i < t_size; ++i)
143  if (itsInterpreter->tensor(i)->name)
144  LINFO("Layer " << i << ": " << itsInterpreter->tensor(i)->name << ", "
145  << jevois::dnn::shapestr(itsInterpreter->tensor(i)) << ", "
146  << itsInterpreter->tensor(i)->bytes << " bytes, scale: "
147  << itsInterpreter->tensor(i)->params.scale << ", zero: "
148  << itsInterpreter->tensor(i)->params.zero_point);
149 
150  //if (threads::get()) itsInterpreter->SetNumThreads(threads::get());
151  for (size_t i = 0; i < itsInterpreter->inputs().size(); ++i)
152  LINFO("input " << i << " is layer " << itsInterpreter->inputs()[i]);
153  for (size_t i = 0; i < itsInterpreter->outputs().size(); ++i)
154  LINFO("output " << i << " is layer " << itsInterpreter->outputs()[i]);
155  }
156  catch (std::exception const & e)
157  {
158  std::string err = "\n";
159  for (std::string const & s : itsErrorReporter.itsErrors) err += "ERR " + s + "\n";
160  err += e.what();
161  throw std::runtime_error(err);
162  }
163 }
164 
165 // ####################################################################################################
166 std::vector<cv::Mat> jevois::dnn::NetworkTPU::doprocess(std::vector<cv::Mat> const & blobs,
167  std::vector<std::string> & info)
168 {
169  if ( ! itsInterpreter) LFATAL("Internal inconsistency");
170 
171  if (blobs.size() != itsInterpreter->inputs().size())
172  LFATAL("Received " << blobs.size() << " input tensors, but network wants " << itsInterpreter->inputs().size());
173 
174  auto const & input_indices = itsInterpreter->inputs();
175  for (size_t b = 0; b < blobs.size(); ++b)
176  {
177  cv::Mat const & cvin = blobs[b];
178  auto * itensor = itsInterpreter->tensor(input_indices[b]);
179  if (itensor == nullptr) LFATAL("Network has Null input tensor " << b);
180 
181  // Make sure input dims are a match:
182  TfLiteIntArray const & tfindims = *itensor->dims;
183  cv::MatSize const & cvindims = cvin.size;
184  for (int i = 0; i < tfindims.size; ++i)
185  if (tfindims.data[i] != cvindims[i])
186  LFATAL("Input " << b << " mismatch: blob is " << jevois::dnn::shapestr(cvin) <<
187  " but network wants " << jevois::dnn::shapestr(itensor));
188 
189  // Make sure total sizes in bytes are a match too:
190  size_t const cvsiz = cvin.total() * cvin.elemSize();
191  size_t const tfsiz = itensor->bytes;
192  if (cvsiz != tfsiz) LFATAL("Input " << b << " size mismatch: blob has " << cvsiz <<
193  " but network wants " << tfsiz << " bytes. Maybe type is wrong in intensors?");
194 
195  // Copy input blob to input tensor:
196  uint8_t * input = tflite::GetTensorData<uint8_t>(itensor);
197  if (input == nullptr) LFATAL("Input tensor " << b << " is null in network");
198  std::memcpy(input, cvin.data, cvsiz);
199  info.emplace_back("- Input tensors ok");
200  }
201 
202  // Run the network:
203  if (itsInterpreter->Invoke() != kTfLiteOk) LFATAL("Failed to invoke interpreter");
204  info.emplace_back("- Network forward pass ok");
205 
206  // Collect/convert the outputs:
207  auto const & output_indices = itsInterpreter->outputs();
208  std::vector<cv::Mat> outs;
209 
210  for (size_t o = 0; o < output_indices.size(); ++o)
211  {
212  auto const * otensor = itsInterpreter->tensor(output_indices[o]);
213  if (otensor == nullptr) LFATAL("Network produced Null output tensor " << o);
214 
215  // Allocate an OpenCV output array of dims that match our output tensor:
216  TfLiteIntArray const & tfdims = *otensor->dims;
217  std::vector<int> cvdims; size_t sz = 1;
218  for (int i = 0; i < tfdims.size; ++i) { cvdims.emplace_back(tfdims.data[i]); sz *= tfdims.data[i]; }
219 
220  // Convert/copy output tensor data to OpenCV arrays:
221  TfLiteType const ot = otensor->type;
222  std::string const otname = TfLiteTypeGetName(ot);
223  bool notdone = true;
224 
225  if (dequant::get())
226  {
227  switch (ot)
228  {
229  case kTfLiteUInt8:
230  {
231  // Dequantize UINT8 to FLOAT32:
232  uint8_t const * output = tflite::GetTensorData<uint8_t>(otensor);
233  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
234  cv::Mat const cvi(cvdims, CV_8U, (void *)output);
235  cv::Mat cvout; cvi.convertTo(cvout, CV_32F);
236  cvout -= otensor->params.zero_point;
237  cvout *= otensor->params.scale;
238  info.emplace_back("- Dequantized " + otname + " output tensor " + std::to_string(o) + " to FLOAT32");
239  outs.emplace_back(cvout);
240  notdone = false;
241  }
242  break;
243 
244  default:
245  // For now, we only know how to dequantize uint8...
246  break;
247  }
248  }
249 
250  if (notdone)
251  {
252  // We just want to copy the data untouched, except that OpenCV does not support as many pixel types as tensorflow:
253  switch (ot)
254  {
255  case kTfLiteInt64: // used by DeepLabV3. Just convert to int32:
256  {
257  cv::Mat cvout(cvdims, CV_32S);
258  int * cvoutdata = (int *)cvout.data;
259  int64_t const * output = tflite::GetTensorData<int64_t>(otensor);
260  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
261  for (size_t i = 0; i < sz; ++i) *cvoutdata++ = int(*output++);
262  info.emplace_back("- Converted " + otname + " output tensor " + std::to_string(o) + " to INT32");
263  outs.emplace_back(cvout);
264  }
265  break;
266 
267  case kTfLiteFloat32:
268  case kTfLiteInt32:
269  case kTfLiteUInt8:
270  case kTfLiteInt16:
271  case kTfLiteInt8:
272  case kTfLiteFloat16:
273  case kTfLiteFloat64:
274  {
275  // Simple copy with no conversion:
276  unsigned int cvtype = jevois::dnn::tf2cv(ot);
277  cv::Mat cvout(cvdims, cvtype);
278  uint8_t const * output = tflite::GetTensorData<uint8_t>(otensor);
279  if (output == nullptr) LFATAL("Network produced Null output tensor data " << o);
280  std::memcpy(cvout.data, output, sz * jevois::cvBytesPerPix(cvtype));
281  info.emplace_back("- Copied " + otname + " output tensor " + std::to_string(o));
282  outs.emplace_back(cvout);
283  }
284  break;
285 
286  default:
287  LFATAL("Output tensor " << otensor->name << " has unsupported type: " << otname);
288  }
289  }
290  }
291 
292  // Report the TPU temperature:
293  static int temp = 0;
294  size_t const tn = tpunum::get();
295  if ((jevois::frameNum() % 50) == 0)
296  try { temp = std::stoi(jevois::getFileString(jevois::sformat("/sys/class/apex/apex_%zu/temp", tn).c_str())); }
297  catch (...) { } // silently ignore any errors
298  info.emplace_back(jevois::sformat("- TPU%zu temp %dC", tn, temp / 1000));
299 
300  return outs;
301 }
302 
303 #endif // JEVOIS_PRO
jevois::imu::get
Data collection mode RAW means that the latest available raw data is returned each time get() is called
jevois::dnn::NetworkTPU::inputShapes
virtual std::vector< vsi_nn_tensor_attr_t > inputShapes() override
Get shapes of all input tensors.
Definition: NetworkTPU.C:55
jevois::sformat
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition: Utils.C:419
jevois::dnn::tf2cv
int tf2cv(TfLiteType t)
Convert from TensorFlow data type to OpenCV.
Definition: Utils.C:207
jevois::cvBytesPerPix
unsigned int cvBytesPerPix(unsigned int cvtype)
Return the number of bytes per pixel for a given OpenCV pixel type.
Definition: Utils.C:89
jevois::dnn::NetworkTPU::outputShapes
virtual std::vector< vsi_nn_tensor_attr_t > outputShapes() override
Get shapes of all output tensors.
Definition: NetworkTPU.C:78
Utils.H
jevois::dnn::tensorattr
vsi_nn_tensor_attr_t tensorattr(TfLiteTensor const *t)
Get tensor shape and type attributes for a TensorFlow Lite tensor.
Definition: Utils.C:484
o
#define o
Definition: Font10x20.C:6
NetworkTPU.H
jevois::dnn::Network::waitBeforeDestroy
void waitBeforeDestroy()
If network is currently loading, wait until that is done before destroying.
Definition: Network.C:37
LERROR
#define LERROR(msg)
Convenience macro for users to print out console or syslog messages, ERROR level.
Definition: Log.H:211
jevois::getFileString
std::string getFileString(char const *fname, int skip=0)
Read one line from a file and return it as a string.
Definition: Utils.C:516
jevois::dnn::NetworkTPU::doprocess
std::vector< cv::Mat > doprocess(std::vector< cv::Mat > const &blobs, std::vector< std::string > &info) override
Process input blobs and obtain output blobs.
Definition: NetworkTPU.C:166
jevois::dnn::parseTensorSpecs
std::vector< vsi_nn_tensor_attr_t > parseTensorSpecs(std::string const &specs)
Parse tensor specification.
Definition: Utils.C:308
jevois::absolutePath
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition: Utils.C:365
jevois::dnn::shapestr
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition: Utils.C:104
jevois::dnn::NetworkTPU::freeze
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
Definition: NetworkTPU.C:44
LFATAL
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
jevois::dnn::NetworkTPU::load
void load() override
Load from disk.
Definition: NetworkTPU.C:101
jevois::to_string
std::string to_string(T const &val)
Convert from type to string.
jevois::dnn::NetworkTPU::~NetworkTPU
virtual ~NetworkTPU()
Destructor.
Definition: NetworkTPU.C:40
jevois::dnn::attrstr
std::string attrstr(vsi_nn_tensor_attr_t const &attr)
Get a string describing the specs of a tensor, including quantification specs (not provided by shapes...
Definition: Utils.C:433
Utils.H
LINFO
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
Definition: Log.H:194