JeVoisBase  1.0
JeVois Smart Embedded Machine Vision Toolkit Base Modules
ObjectRecognitionILAB.C
Go to the documentation of this file.
1 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4 // California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5 //
6 // This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7 // redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8 // Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10 // License for more details. You should have received a copy of the GNU General Public License along with this program;
11 // if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12 //
13 // Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14 // Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16 /*! \file */
17 
19 #include "tiny-dnn/tiny_dnn/tiny_dnn.h"
20 #include <jevois/Debug/Log.H>
21 #include <algorithm>
22 #include <fstream>
23 
24 #include <opencv2/imgproc/imgproc.hpp>
25 
26 #ifdef JEVOIS_PLATFORM
27 #include <opencv2/imgcodecs/imgcodecs.hpp> // for imread in opencv3.1
28 #else
29 #include <opencv2/highgui/highgui.hpp> // for imread and imshow in older opencv
30 #endif
31 
32 
33 // ####################################################################################################
34 ObjectRecognitionILAB::ObjectRecognitionILAB(std::string const & instance) :
35  ObjectRecognition<tiny_dnn::sequential>(instance)
36 { }
37 
38 // ####################################################################################################
40 {
41  // Nothing to do, base class destructor will de-allocate the network
42 }
43 
44 // ####################################################################################################
46 {
47  typedef tiny_dnn::convolutional_layer<tiny_dnn::activation::identity> conv;
48  typedef tiny_dnn::max_pooling_layer<tiny_dnn::activation::relu> pool;
49 
50  int const n_fmaps = 10; // base number of feature maps
51  int const n_fc = 64; // number of hidden units in fully-connected layer
52  int const n_categ = 5; // number of object categories
53 
54  (*net) << conv(32, 32, 5, 3, n_fmaps, tiny_dnn::padding::same)
55  << pool(32, 32, n_fmaps, 2)
56  << conv(16, 16, 5, n_fmaps, n_fmaps * 2, tiny_dnn::padding::same)
57  << pool(16, 16, n_fmaps * 2, 2)
58  << conv(8, 8, 5, n_fmaps * 2, n_fmaps * 4, tiny_dnn::padding::same)
59  << pool(8, 8, n_fmaps * 4, 2)
60  << tiny_dnn::fully_connected_layer<tiny_dnn::activation::identity>(4 * 4 * n_fmaps * 4, n_fc)
61  << tiny_dnn::fully_connected_layer<tiny_dnn::activation::softmax>(n_fc, n_categ);
62 }
63 
64 // ####################################################################################################
65 namespace
66 {
67  void load_compiled(std::string const & fname, std::vector<tiny_dnn::label_t> & labels,
68  std::vector<tiny_dnn::vec_t> & images)
69  {
70  float const scale_min = -1.0F;
71  float const scale_max = 1.0F;
72  int const w = 32, h = 32; // image width and height
73 
74  std::ifstream ifs(fname, std::ios::in | std::ios::binary);
75  if (ifs.is_open() == false) LFATAL("Failed to open load " << fname);
76 
77  // We need to randomize the order. To achieve this, we will here first compute a randomized set of indices, then
78  // populate the data at those indices:
79  size_t siz;
80  {
81  // get input file size:
82  std::ifstream file(fname, std::ios::binary | std::ios::ate);
83  siz = file.tellg() / (w * h * 3 + 1);
84  LINFO("File has " << siz << " entries");
85  }
86 
87  std::vector<size_t> idx; for (size_t i = 0; i < siz; ++i) idx.push_back(i);
88  std::random_shuffle(idx.begin(), idx.end());
89  labels.resize(siz); images.resize(siz);
90 
91  // Load the data:
92  std::vector<unsigned char> buf(w * h * 3);
93 
94  for (size_t i = 0; i < siz; ++i)
95  {
96  unsigned char label; ifs.read((char *)(&label), 1);
97  if (!ifs) LFATAL("Error reading " << fname);
98  labels[idx[i]] = label;
99 
100  ifs.read((char *)(&buf[0]), buf.size());
101  if (!ifs) LFATAL("Error reading " << fname);
102 
103  tiny_dnn::vec_t img;
104  std::transform(buf.begin(), buf.end(), std::back_inserter(img),
105  [&](unsigned char c) { return scale_min + (scale_max - scale_min) * c / 255.0F; });
106 
107  images[idx[i]] = img;
108  }
109  ifs.close();
110  LINFO("Loaded " << siz << " images and labels from file " << fname);
111  }
112 
113  // ####################################################################################################
114  void create_compiled(std::string const & fname, size_t startinst, size_t numinst)
115  {
116  static std::vector<std::string> const categs = { "car", "equip", "plane", "boat", "mil" }; // FIXME
117 
118  LINFO("Create " << fname << " using " << numinst << " instances starting at " << startinst);
119 
120  std::ofstream ofs(fname, std::ios::out | std::ios::binary);
121  if (ofs.is_open() == false) LFATAL("Error trying to write file " << fname);
122 
123  for (unsigned char categ = 0; categ < categs.size(); ++categ)
124  for (size_t inst = startinst; inst < startinst + numinst; ++inst)
125  {
126  // Create big image filename: eg, base/boat/boat-i0008-b0077-cropped.png
127  char tmp[2048];
128  snprintf(tmp, 2048, "/lab/tmp10b/u/iLab-20M-Cropped-Jiaping-Augments/%s/%s-i%04zu-b0000-cropped.png",
129  categs[categ].c_str(), categs[categ].c_str(), inst);
130  LINFO("... adding 1320 images from " << tmp);
131 
132  // Load the big image:
133  cv::Mat bigimg = cv::imread(tmp);
134 
135  // Images contain 44 (wide) x 30 (tall) = 1320 crops. Determine crop size:
136  int const cw = bigimg.cols / 44;
137  int const ch = bigimg.rows / 30;
138  LINFO("cw="<<cw<<" ch="<<ch);
139  // Extract the individual views: we have 44 views horizontally, in this loop order
140  int x = 0, y = 0;
141  for (int cam = 0; cam < 11; ++cam)
142  for (int rot = 0; rot < 8; ++rot)
143  for (int lig = 0; lig < 5; ++lig)
144  for (int foc = 0; foc < 3; ++foc)
145  {
146  cv::Mat imgcrop = bigimg(cv::Rect(x, y, cw, ch));
147  cv::Mat obj; cv::resize(imgcrop, obj, cv::Size(32, 32), 0, 0, cv::INTER_AREA);
148 
149 #ifndef JEVOIS_PLATFORM
150  cv::imshow("conversion", obj); cv::waitKey(1);
151 #endif
152  cv::Mat rgbobj; cv::cvtColor(obj, rgbobj, CV_BGR2RGB); // opencv reads BGR by default
153 
154  ofs.write((char const *)(&categ), 1);
155  ofs.write((char const *)(rgbobj.data), 32*32*3);
156 
157  x += cw; if (x >= bigimg.cols) { x = 0; y += ch; }
158  }
159  }
160  }
161 }
162 
163 // ####################################################################################################
164 void ObjectRecognitionILAB::train(std::string const & path)
165 {
166  LINFO("Load training data from directory " << path);
167 
168  float learning_rate = 0.01F;
169  size_t const ntrain = 18; // number of objects to use for training
170  size_t const ntest = 4; // number of objects to use for test
171 
172  // Load ILAB dataset:
173  std::vector<tiny_dnn::label_t> train_labels, test_labels;
174  std::vector<tiny_dnn::vec_t> train_images, test_images;
175 
176  // Try to load from pre-compiled:
177  std::string const trainpath = path + "/ilab5-train.bin";
178  std::string const testpath = path + "/ilab5-test.bin";
179 
180  std::ifstream ifs(trainpath);
181  if (ifs.is_open() == false)
182  {
183  // Need to create the datasets from raw images
184  create_compiled(trainpath, 1, ntrain);
185  create_compiled(testpath, ntrain + 2, ntest);
186  }
187 
188  // Ok, the datasets:
189  load_compiled(trainpath, train_labels, train_images);
190  load_compiled(testpath, test_labels, test_images);
191 
192  LINFO("Start training...");
193  int const n_minibatch = 48;
194  int const n_train_epochs = 100;
195 
196  tiny_dnn::timer t;
197 
198  // Create callbacks:
199  auto on_enumerate_epoch = [&](){
200  LINFO(t.elapsed() << "s elapsed.");
201  tiny_dnn::result res = net->test(test_images, test_labels);
202  LINFO(res.num_success << "/" << res.num_total << " success/total validation score so far");
203 
204  //disp.restart(train_images.size());
205  t.restart();
206  };
207 
208  auto on_enumerate_minibatch = [&](){
209  //disp += n_minibatch;
210  };
211 
212  // Training:
213  tiny_dnn::adam optimizer;
214  optimizer.alpha *= static_cast<tiny_dnn::float_t>(sqrt(n_minibatch) * learning_rate);
215  net->train<tiny_dnn::cross_entropy>(optimizer, train_images, train_labels, n_minibatch, n_train_epochs,
216  on_enumerate_minibatch, on_enumerate_epoch);
217 
218  LINFO("Training complete");
219 
220  // test and show results
221  net->test(test_images, test_labels).print_detail(std::cout);
222 }
223 
224 // ####################################################################################################
225 std::string const & ObjectRecognitionILAB::category(size_t idx) const
226 {
227  static std::vector<std::string> const names = { "car", "equip", "plane", "boat", "mil" };
228 
229  if (idx >= names.size()) LFATAL("Category index out of bounds");
230 
231  return names[idx];
232 }
virtual std::string const & category(size_t idx) const override
Return the name of a given category (0-based index in the vector of results)
virtual void train(std::string const &path) override
Train the network.
virtual void define() override
Define the network structure.
#define LFATAL(msg)
Wrapper around a neural network implemented by with the tiny-dnn framework by Taiga Nomi...
tiny_dnn::network< tiny_dnn::sequential > * net
#define LINFO(msg)
ObjectRecognitionILAB(std::string const &instance)
Constructor, loads the given CNN, its sizes must match our (fixed) internal network structure...
virtual ~ObjectRecognitionILAB()
Destructor.