JeVoisBase  1.22
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
Loading...
Searching...
No Matches
ObjectRecognitionILAB.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
19#include "tiny-dnn/tiny_dnn/tiny_dnn.h"
20#include <jevois/Debug/Log.H>
21#include <algorithm>
22#include <fstream>
23
24#include <opencv2/imgproc/imgproc.hpp>
25
26#ifdef JEVOIS_PLATFORM
27#include <opencv2/imgcodecs/imgcodecs.hpp> // for imread in opencv3.1
28#else
29#include <opencv2/highgui/highgui.hpp> // for imread and imshow in older opencv
30#endif
31
32
33// ####################################################################################################
34ObjectRecognitionILAB::ObjectRecognitionILAB(std::string const & instance) :
35 ObjectRecognition<tiny_dnn::sequential>(instance)
36{ }
37
38// ####################################################################################################
40{
41 // Nothing to do, base class destructor will de-allocate the network
42}
43
44// ####################################################################################################
46{
47 using conv = tiny_dnn::convolutional_layer;
48 using pool = tiny_dnn::max_pooling_layer;
49 using fc = tiny_dnn::fully_connected_layer;
50 using relu = tiny_dnn::relu_layer;
51 using softmax = tiny_dnn::softmax_layer;
52
53 const size_t n_fmaps = 10; ///< number of feature maps for upper layer
54 const size_t n_fc = 64; ///< number of hidden units in fully-connected layer
55 int const n_categ = 5; // number of object categories
56
57 (*net) << conv(32, 32, 5, 3, n_fmaps, tiny_dnn::padding::same) // C1
58 << pool(32, 32, n_fmaps, 2) // P2
59 << relu(16, 16, n_fmaps) // activation
60 << conv(16, 16, 5, n_fmaps, n_fmaps * 2, tiny_dnn::padding::same) // C3
61 << pool(16, 16, n_fmaps * 2, 2) // P4
62 << relu(8, 8, n_fmaps) // activation
63 << conv(8, 8, 5, n_fmaps * 2, n_fmaps * 4, tiny_dnn::padding::same) // C5
64 << pool(8, 8, n_fmaps * 4, 2) // P6
65 << relu(4, 4, n_fmaps * 42) // activation
66 << fc(4 * 4 * n_fmaps * 4, n_fc) // FC7
67 << fc(n_fc, n_categ) << softmax(n_categ); // FC10
68}
69
70// ####################################################################################################
71namespace
72{
73 void load_compiled(std::string const & fname, std::vector<tiny_dnn::label_t> & labels,
74 std::vector<tiny_dnn::vec_t> & images)
75 {
76 float const scale_min = -1.0F;
77 float const scale_max = 1.0F;
78 int const w = 32, h = 32; // image width and height
79
80 std::ifstream ifs(fname, std::ios::in | std::ios::binary);
81 if (ifs.is_open() == false) LFATAL("Failed to open load " << fname);
82
83 // We need to randomize the order. To achieve this, we will here first compute a randomized set of indices, then
84 // populate the data at those indices:
85 size_t siz;
86 {
87 // get input file size:
88 std::ifstream file(fname, std::ios::binary | std::ios::ate);
89 siz = file.tellg() / (w * h * 3 + 1);
90 LINFO("File has " << siz << " entries");
91 }
92
93 std::vector<size_t> idx; for (size_t i = 0; i < siz; ++i) idx.push_back(i);
94 std::random_shuffle(idx.begin(), idx.end());
95 labels.resize(siz); images.resize(siz);
96
97 // Load the data:
98 std::vector<unsigned char> buf(w * h * 3);
99
100 for (size_t i = 0; i < siz; ++i)
101 {
102 unsigned char label; ifs.read((char *)(&label), 1);
103 if (!ifs) LFATAL("Error reading " << fname);
104 labels[idx[i]] = label;
105
106 ifs.read((char *)(&buf[0]), buf.size());
107 if (!ifs) LFATAL("Error reading " << fname);
108
109 tiny_dnn::vec_t img;
110 std::transform(buf.begin(), buf.end(), std::back_inserter(img),
111 [&](unsigned char c) { return scale_min + (scale_max - scale_min) * c / 255.0F; });
112
113 images[idx[i]] = img;
114 }
115 ifs.close();
116 LINFO("Loaded " << siz << " images and labels from file " << fname);
117 }
118
119 // ####################################################################################################
120 void create_compiled(std::string const & fname, size_t startinst, size_t numinst)
121 {
122 static std::vector<std::string> const categs = { "car", "equip", "plane", "boat", "mil" }; // FIXME
123
124 LINFO("Create " << fname << " using " << numinst << " instances starting at " << startinst);
125
126 std::ofstream ofs(fname, std::ios::out | std::ios::binary);
127 if (ofs.is_open() == false) LFATAL("Error trying to write file " << fname);
128
129 for (unsigned char categ = 0; categ < categs.size(); ++categ)
130 for (size_t inst = startinst; inst < startinst + numinst; ++inst)
131 {
132 // Create big image filename: eg, base/boat/boat-i0008-b0077-cropped.png
133 char tmp[2048];
134 snprintf(tmp, 2048, "/lab/tmp10b/u/iLab-20M-Cropped-Jiaping-Augments/%s/%s-i%04zu-b0000-cropped.png",
135 categs[categ].c_str(), categs[categ].c_str(), inst);
136 LINFO("... adding 1320 images from " << tmp);
137
138 // Load the big image:
139 cv::Mat bigimg = cv::imread(tmp);
140
141 // Images contain 44 (wide) x 30 (tall) = 1320 crops. Determine crop size:
142 int const cw = bigimg.cols / 44;
143 int const ch = bigimg.rows / 30;
144 LINFO("cw="<<cw<<" ch="<<ch);
145 // Extract the individual views: we have 44 views horizontally, in this loop order
146 int x = 0, y = 0;
147 for (int cam = 0; cam < 11; ++cam)
148 for (int rot = 0; rot < 8; ++rot)
149 for (int lig = 0; lig < 5; ++lig)
150 for (int foc = 0; foc < 3; ++foc)
151 {
152 cv::Mat imgcrop = bigimg(cv::Rect(x, y, cw, ch));
153 cv::Mat obj; cv::resize(imgcrop, obj, cv::Size(32, 32), 0, 0, cv::INTER_AREA);
154
155#ifndef JEVOIS_PLATFORM
156 cv::imshow("conversion", obj); cv::waitKey(1);
157#endif
158 cv::Mat rgbobj; cv::cvtColor(obj, rgbobj, cv::COLOR_BGR2RGB); // opencv reads BGR by default
159
160 ofs.write((char const *)(&categ), 1);
161 ofs.write((char const *)(rgbobj.data), 32*32*3);
162
163 x += cw; if (x >= bigimg.cols) { x = 0; y += ch; }
164 }
165 }
166 }
167}
168
169// ####################################################################################################
170void ObjectRecognitionILAB::train(std::string const & path)
171{
172 LINFO("Load training data from directory " << path);
173
174 float learning_rate = 0.01F;
175 size_t const ntrain = 18; // number of objects to use for training
176 size_t const ntest = 4; // number of objects to use for test
177
178 // Load ILAB dataset:
179 std::vector<tiny_dnn::label_t> train_labels, test_labels;
180 std::vector<tiny_dnn::vec_t> train_images, test_images;
181
182 // Try to load from pre-compiled:
183 std::string const trainpath = path + "/ilab5-train.bin";
184 std::string const testpath = path + "/ilab5-test.bin";
185
186 std::ifstream ifs(trainpath);
187 if (ifs.is_open() == false)
188 {
189 // Need to create the datasets from raw images
190 create_compiled(trainpath, 1, ntrain);
191 create_compiled(testpath, ntrain + 2, ntest);
192 }
193
194 // Ok, the datasets:
195 load_compiled(trainpath, train_labels, train_images);
196 load_compiled(testpath, test_labels, test_images);
197
198 LINFO("Start training...");
199 int const n_minibatch = 48;
200 int const n_train_epochs = 100;
201
202 tiny_dnn::timer t;
203
204 // Create callbacks:
205 auto on_enumerate_epoch = [&](){
206 LINFO(t.elapsed() << "s elapsed.");
207 tiny_dnn::result res = net->test(test_images, test_labels);
208 LINFO(res.num_success << "/" << res.num_total << " success/total validation score so far");
209
210 //disp.restart(train_images.size());
211 t.restart();
212 };
213
214 auto on_enumerate_minibatch = [&](){
215 //disp += n_minibatch;
216 };
217
218 // Training:
219 tiny_dnn::adam optimizer;
220 optimizer.alpha *= static_cast<tiny_dnn::float_t>(sqrt(n_minibatch) * learning_rate);
221 net->train<tiny_dnn::cross_entropy>(optimizer, train_images, train_labels, n_minibatch, n_train_epochs,
222 on_enumerate_minibatch, on_enumerate_epoch);
223
224 LINFO("Training complete");
225
226 // test and show results
227 net->test(test_images, test_labels).print_detail(std::cout);
228}
229
230// ####################################################################################################
231std::string const & ObjectRecognitionILAB::category(size_t idx) const
232{
233 static std::vector<std::string> const names = { "car", "equip", "plane", "boat", "mil" };
234
235 if (idx >= names.size()) LFATAL("Category index out of bounds");
236
237 return names[idx];
238}
int h
ObjectRecognitionILAB(std::string const &instance)
Constructor, loads the given CNN, its sizes must match our (fixed) internal network structure.
virtual std::string const & category(size_t idx) const override
Return the name of a given category (0-based index in the vector of results)
virtual ~ObjectRecognitionILAB()
Destructor.
virtual void define() override
Define the network structure.
virtual void train(std::string const &path) override
Train the network.
Wrapper around a neural network implemented by with the tiny-dnn framework by Taiga Nomi.
tiny_dnn::network< tiny_dnn::sequential > * net
#define LFATAL(msg)
#define LINFO(msg)