JeVoisBase  1.6
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
ObjectRecognitionILAB.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
19 #include "tiny-dnn/tiny_dnn/tiny_dnn.h"
20 #include <jevois/Debug/Log.H>
21 #include <algorithm>
22 #include <fstream>
23 
24 #include <opencv2/imgproc/imgproc.hpp>
25 
26 #ifdef JEVOIS_PLATFORM
27 #include <opencv2/imgcodecs/imgcodecs.hpp> // for imread in opencv3.1
28 #else
29 #include <opencv2/highgui/highgui.hpp> // for imread and imshow in older opencv
30 #endif
31 
32 
33 // ####################################################################################################
34 ObjectRecognitionILAB::ObjectRecognitionILAB(std::string const & instance) :
35  ObjectRecognition<tiny_dnn::sequential>(instance)
36 { }
37 
38 // ####################################################################################################
40 {
41  // Nothing to do, base class destructor will de-allocate the network
42 }
43 
44 // ####################################################################################################
46 {
47  using conv = tiny_dnn::convolutional_layer;
48  using pool = tiny_dnn::max_pooling_layer;
49  using fc = tiny_dnn::fully_connected_layer;
50  using relu = tiny_dnn::relu_layer;
51  using softmax = tiny_dnn::softmax_layer;
52 
53  const size_t n_fmaps = 10; ///< number of feature maps for upper layer
54  const size_t n_fc = 64; ///< number of hidden units in fully-connected layer
55  int const n_categ = 5; // number of object categories
56 
57  (*net) << conv(32, 32, 5, 3, n_fmaps, tiny_dnn::padding::same) // C1
58  << pool(32, 32, n_fmaps, 2) // P2
59  << relu(16, 16, n_fmaps) // activation
60  << conv(16, 16, 5, n_fmaps, n_fmaps * 2, tiny_dnn::padding::same) // C3
61  << pool(16, 16, n_fmaps * 2, 2) // P4
62  << relu(8, 8, n_fmaps) // activation
63  << conv(8, 8, 5, n_fmaps * 2, n_fmaps * 4, tiny_dnn::padding::same) // C5
64  << pool(8, 8, n_fmaps * 4, 2) // P6
65  << relu(4, 4, n_fmaps * 42) // activation
66  << fc(4 * 4 * n_fmaps * 4, n_fc) // FC7
67  << fc(n_fc, n_categ) << softmax(n_categ); // FC10
68 }
69 
70 // ####################################################################################################
71 namespace
72 {
73  void load_compiled(std::string const & fname, std::vector<tiny_dnn::label_t> & labels,
74  std::vector<tiny_dnn::vec_t> & images)
75  {
76  float const scale_min = -1.0F;
77  float const scale_max = 1.0F;
78  int const w = 32, h = 32; // image width and height
79 
80  std::ifstream ifs(fname, std::ios::in | std::ios::binary);
81  if (ifs.is_open() == false) LFATAL("Failed to open load " << fname);
82 
83  // We need to randomize the order. To achieve this, we will here first compute a randomized set of indices, then
84  // populate the data at those indices:
85  size_t siz;
86  {
87  // get input file size:
88  std::ifstream file(fname, std::ios::binary | std::ios::ate);
89  siz = file.tellg() / (w * h * 3 + 1);
90  LINFO("File has " << siz << " entries");
91  }
92 
93  std::vector<size_t> idx; for (size_t i = 0; i < siz; ++i) idx.push_back(i);
94  std::random_shuffle(idx.begin(), idx.end());
95  labels.resize(siz); images.resize(siz);
96 
97  // Load the data:
98  std::vector<unsigned char> buf(w * h * 3);
99 
100  for (size_t i = 0; i < siz; ++i)
101  {
102  unsigned char label; ifs.read((char *)(&label), 1);
103  if (!ifs) LFATAL("Error reading " << fname);
104  labels[idx[i]] = label;
105 
106  ifs.read((char *)(&buf[0]), buf.size());
107  if (!ifs) LFATAL("Error reading " << fname);
108 
109  tiny_dnn::vec_t img;
110  std::transform(buf.begin(), buf.end(), std::back_inserter(img),
111  [&](unsigned char c) { return scale_min + (scale_max - scale_min) * c / 255.0F; });
112 
113  images[idx[i]] = img;
114  }
115  ifs.close();
116  LINFO("Loaded " << siz << " images and labels from file " << fname);
117  }
118 
119  // ####################################################################################################
120  void create_compiled(std::string const & fname, size_t startinst, size_t numinst)
121  {
122  static std::vector<std::string> const categs = { "car", "equip", "plane", "boat", "mil" }; // FIXME
123 
124  LINFO("Create " << fname << " using " << numinst << " instances starting at " << startinst);
125 
126  std::ofstream ofs(fname, std::ios::out | std::ios::binary);
127  if (ofs.is_open() == false) LFATAL("Error trying to write file " << fname);
128 
129  for (unsigned char categ = 0; categ < categs.size(); ++categ)
130  for (size_t inst = startinst; inst < startinst + numinst; ++inst)
131  {
132  // Create big image filename: eg, base/boat/boat-i0008-b0077-cropped.png
133  char tmp[2048];
134  snprintf(tmp, 2048, "/lab/tmp10b/u/iLab-20M-Cropped-Jiaping-Augments/%s/%s-i%04zu-b0000-cropped.png",
135  categs[categ].c_str(), categs[categ].c_str(), inst);
136  LINFO("... adding 1320 images from " << tmp);
137 
138  // Load the big image:
139  cv::Mat bigimg = cv::imread(tmp);
140 
141  // Images contain 44 (wide) x 30 (tall) = 1320 crops. Determine crop size:
142  int const cw = bigimg.cols / 44;
143  int const ch = bigimg.rows / 30;
144  LINFO("cw="<<cw<<" ch="<<ch);
145  // Extract the individual views: we have 44 views horizontally, in this loop order
146  int x = 0, y = 0;
147  for (int cam = 0; cam < 11; ++cam)
148  for (int rot = 0; rot < 8; ++rot)
149  for (int lig = 0; lig < 5; ++lig)
150  for (int foc = 0; foc < 3; ++foc)
151  {
152  cv::Mat imgcrop = bigimg(cv::Rect(x, y, cw, ch));
153  cv::Mat obj; cv::resize(imgcrop, obj, cv::Size(32, 32), 0, 0, cv::INTER_AREA);
154 
155 #ifndef JEVOIS_PLATFORM
156  cv::imshow("conversion", obj); cv::waitKey(1);
157 #endif
158  cv::Mat rgbobj; cv::cvtColor(obj, rgbobj, CV_BGR2RGB); // opencv reads BGR by default
159 
160  ofs.write((char const *)(&categ), 1);
161  ofs.write((char const *)(rgbobj.data), 32*32*3);
162 
163  x += cw; if (x >= bigimg.cols) { x = 0; y += ch; }
164  }
165  }
166  }
167 }
168 
169 // ####################################################################################################
170 void ObjectRecognitionILAB::train(std::string const & path)
171 {
172  LINFO("Load training data from directory " << path);
173 
174  float learning_rate = 0.01F;
175  size_t const ntrain = 18; // number of objects to use for training
176  size_t const ntest = 4; // number of objects to use for test
177 
178  // Load ILAB dataset:
179  std::vector<tiny_dnn::label_t> train_labels, test_labels;
180  std::vector<tiny_dnn::vec_t> train_images, test_images;
181 
182  // Try to load from pre-compiled:
183  std::string const trainpath = path + "/ilab5-train.bin";
184  std::string const testpath = path + "/ilab5-test.bin";
185 
186  std::ifstream ifs(trainpath);
187  if (ifs.is_open() == false)
188  {
189  // Need to create the datasets from raw images
190  create_compiled(trainpath, 1, ntrain);
191  create_compiled(testpath, ntrain + 2, ntest);
192  }
193 
194  // Ok, the datasets:
195  load_compiled(trainpath, train_labels, train_images);
196  load_compiled(testpath, test_labels, test_images);
197 
198  LINFO("Start training...");
199  int const n_minibatch = 48;
200  int const n_train_epochs = 100;
201 
202  tiny_dnn::timer t;
203 
204  // Create callbacks:
205  auto on_enumerate_epoch = [&](){
206  LINFO(t.elapsed() << "s elapsed.");
207  tiny_dnn::result res = net->test(test_images, test_labels);
208  LINFO(res.num_success << "/" << res.num_total << " success/total validation score so far");
209 
210  //disp.restart(train_images.size());
211  t.restart();
212  };
213 
214  auto on_enumerate_minibatch = [&](){
215  //disp += n_minibatch;
216  };
217 
218  // Training:
219  tiny_dnn::adam optimizer;
220  optimizer.alpha *= static_cast<tiny_dnn::float_t>(sqrt(n_minibatch) * learning_rate);
221  net->train<tiny_dnn::cross_entropy>(optimizer, train_images, train_labels, n_minibatch, n_train_epochs,
222  on_enumerate_minibatch, on_enumerate_epoch);
223 
224  LINFO("Training complete");
225 
226  // test and show results
227  net->test(test_images, test_labels).print_detail(std::cout);
228 }
229 
230 // ####################################################################################################
231 std::string const & ObjectRecognitionILAB::category(size_t idx) const
232 {
233  static std::vector<std::string> const names = { "car", "equip", "plane", "boat", "mil" };
234 
235  if (idx >= names.size()) LFATAL("Category index out of bounds");
236 
237  return names[idx];
238 }
virtual void train(std::string const &path) override
Train the network.
virtual void define() override
Define the network structure.
#define LFATAL(msg)
Wrapper around a neural network implemented by with the tiny-dnn framework by Taiga Nomi...
tiny_dnn::network< tiny_dnn::sequential > * net
#define LINFO(msg)
virtual std::string const & category(size_t idx) const override
Return the name of a given category (0-based index in the vector of results)
ObjectRecognitionILAB(std::string const &instance)
Constructor, loads the given CNN, its sizes must match our (fixed) internal network structure...
virtual ~ObjectRecognitionILAB()
Destructor.