JeVois  1.23
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
YOLOjevois.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2024 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
18#ifdef JEVOIS_PRO
19
21#include <jevois/DNN/CLIP.H>
22#include <jevois/DNN/Utils.H>
25
26// ####################################################################################################
27jevois::dnn::YOLOjevois::YOLOjevois(std::string const & instance, std::map<int, std::string> & labels) :
28 jevois::Component(instance), itsLabels(labels)
29{ }
30
31// ####################################################################################################
32void jevois::dnn::YOLOjevois::setup(size_t nclass, jevois::GUIhelper * helper, std::shared_ptr<jevois::dnn::Network> net)
33{
34 itsNumClasses = nclass;
35 itsHelper = helper;
36 itsNetwork = net;
37}
38
39// ####################################################################################################
42
43// ####################################################################################################
45{
46 clipmodel::freeze(doit);
47 textmodel::freeze(doit);
48}
49
50// ####################################################################################################
52{
53 if (ready() == false) LFATAL("Not ready yet");
54 if (itsCLIP) return itsCLIP->textEmbeddingSize();
55 else return 0;
56}
57
58// ####################################################################################################
60{
61 if (ready() == false) LFATAL("Not ready yet");
62 if (itsCLIP) return itsCLIP->imageEmbeddingSize();
63 else return 0;
64}
65
66namespace
67{
68 // Caution this function checks nothing, only to be used internally here:
69 inline void setEmbedding(cv::Mat & all, size_t clsid, cv::Mat const & e)
70 {
71 memcpy(all.data + clsid * e.cols * sizeof(float), e.data, e.cols * sizeof(float));
72 }
73}
74
75// ####################################################################################################
76void jevois::dnn::YOLOjevois::update(size_t const classnum, std::string const & label)
77{
78 if (ready() == false) LFATAL("Not ready");
79 if (itsCLIP->textEmbeddingSize() == 0) LFATAL("Our CLIP model does not have a text encoder");
80 if (classnum >= itsNumClasses) LFATAL("Invalid class #" << classnum << " given " << itsNumClasses << " classes");
81
82 setEmbedding(itsEmbeddings, classnum, itsCLIP->textEmbedding(label));
83 itsLabels[classnum] = label;
84 itsCLIPimages[classnum] = cv::Mat();
85 updateMainNetwork();
86 itsHelper->reportInfo("Updated class " + std::to_string(classnum) + " to [" + label + ']');
87}
88
89// ####################################################################################################
90void jevois::dnn::YOLOjevois::update(size_t const classnum, cv::Mat const & img)
91{
92 if (ready() == false) LFATAL("Not ready");
93 if (itsCLIP->imageEmbeddingSize() == 0) LFATAL("Our CLIP model does not have an image encoder");
94 if (classnum >= itsNumClasses) LFATAL("Invalid class #" << classnum << " given " << itsNumClasses << " classes");
95
96 setEmbedding(itsEmbeddings, classnum, itsCLIP->imageEmbedding(img));
97 itsLabels[classnum] = "<image for class " + std::to_string(classnum) + '>';
98 itsCLIPimages[classnum] = img;
99 updateMainNetwork();
100 itsHelper->reportInfo("Updated class " + std::to_string(classnum) + " from image");
101}
102
103// ####################################################################################################
105{
106 // If we are loaded, we are ready to process:
107 if (itsLoaded.load()) return true;
108
109 // If we are loading, check whether loading is complete or threw, otherwise return false as we keep loading:
110 if (itsLoading.load())
111 {
112 if (itsLoadFut.valid() && itsLoadFut.wait_for(std::chrono::milliseconds(2)) == std::future_status::ready)
113 {
114 try { itsLoadFut.get(); LINFO("YOLOjevois loaded."); return true; }
115 catch (...) { itsLoading.store(false); jevois::warnAndRethrowException(); }
116 }
117 return false;
118 }
119
120 // Otherwise, trigger an async load:
121 itsLoading.store(true);
122 itsLoadFut = jevois::async(std::bind(&jevois::dnn::YOLOjevois::load, this));
123 LINFO("Loading YOLOjevois helper networks...");
124
125 return false;
126}
127
128// ####################################################################################################
130{
131 if (! itsNetwork || itsNumClasses == 0 || itsHelper == nullptr) LFATAL("Need to call setup() first");
132
133 itsCLIP.reset();
134 if (itsAuxNet) { itsAuxNet.reset(); removeSubComponent("auxnet", false); }
135
136 // First, load the CLIP model:
137 std::string const clipname = yolojevois::clipmodel::get();
138 if (clipname.empty()) return;
139 itsCLIP.reset(new jevois::dnn::CLIP(jevois::absolutePath(JEVOIS_SHARE_PATH "/clip", clipname)));
140 if (itsCLIP->textEmbeddingSize() == 0) LFATAL("CLIP model must have at least a text encoder");
141 bool const has_image_encoder = (itsCLIP->imageEmbeddingSize() > 0) ? true : false;
142
143 // Then process all the labels through the CLIP encoder:
144 int const vec_dim = itsCLIP->textEmbeddingSize();
145 itsEmbeddings = cv::Mat(std::vector<int> { 1, int(itsNumClasses), vec_dim }, CV_32F );
146 itsCLIPimages.clear();
147
148 for (size_t i = 0; i < itsNumClasses; ++i)
149 {
150 std::string label = jevois::dnn::getLabel(itsLabels, i, true);
151 cv::Mat img;
152
153 if (label.empty() || jevois::stringStartsWith(label, "<live-selected "))
154 {
155 itsHelper->reportError("Invalid label for class " + std::to_string(i) + " -- FORCING TO 'person'");
156 label = "person"; itsLabels[i] = label;
157 }
158
159 if (jevois::stringStartsWith(label, "imagefile:"))
160 {
161 if (has_image_encoder)
162 {
163 // Class is defined by an image on disk; load it and compute embedding:
164 std::string imgpath = jevois::absolutePath(JEVOIS_CUSTOM_DNN_PATH, label.substr(10));
165 cv::Mat img_bgr = cv::imread(imgpath, cv::IMREAD_COLOR);
166 if (img_bgr.empty())
167 {
168 itsHelper->reportError("Failed to read " + imgpath + " -- FORCING CLASS "+std::to_string(i)+" TO 'person'");
169 label = "person"; itsLabels[i] = label;
170 setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
171 }
172 else
173 {
174 cv::cvtColor(img_bgr, img, cv::COLOR_BGR2RGB);
175 LINFO("Computing CLIP image embedding for class " << i << " [" << imgpath << "] ...");
176 setEmbedding(itsEmbeddings, i, itsCLIP->imageEmbedding(img));
177 itsLabels[i] = "<from image file>"; // we lose the file name here but will recompute it on save anyway
178 }
179 }
180 else
181 {
182 itsHelper->reportError("No CLIP image encoder -- FORCING CLASS "+std::to_string(i)+" TO 'person'");
183 label = "person"; itsLabels[i] = label;
184 setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
185 }
186 }
187 else
188 {
189 LINFO("Computing CLIP text embedding for class " << i << " [" << label << "] ...");
190 setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
191 }
192 itsCLIPimages.emplace_back(img);
193 }
194 LINFO("CLIP embeddings ready for " << itsNumClasses << " object classes");
195
196 // Then possibly load the ONNX helper:
197 std::string const onnxmodel = yolojevois::textmodel::get();
198 if (onnxmodel.empty() == false)
199 {
200 std::string const m = jevois::absolutePath(JEVOIS_SHARE_PATH, onnxmodel);
201 LINFO("Loading embedding helper " << m << " ...");
202
203 itsAuxNet = addSubComponent<jevois::dnn::NetworkONNX>("auxnet");
204 itsAuxNet->hideAllParams(true);
205 itsAuxNet->setParamStringUnique("model", m);
206
207 int iter = 0;
208 while (itsAuxNet->ready() == false && iter++ < 1000) std::this_thread::sleep_for(std::chrono::milliseconds(10));
209 if (iter == 1000) LFATAL("Timeout waiting for embedding helper to load...");
210
211 LINFO("Embedding helper ready.");
212 }
213
214 // We are officially loaded now:
215 itsLoaded.store(true);
216 itsLoading.store(false);
217
218 // Update our outputs:
219 updateMainNetwork();
220}
221
222// ####################################################################################################
223void jevois::dnn::YOLOjevois::updateMainNetwork()
224{
225 if (ready() == false) LFATAL("Not ready");
226 if (! itsNetwork) LFATAL("No main network to update");
227 if (itsNetwork->ready() == false) LFATAL("Main network not ready");
228
229 if (itsAuxNet)
230 {
231 // Run the aux net to get 5 tensors from the CLIP embeddings:
232 std::vector<cv::Mat> ins; ins.push_back(itsEmbeddings);
233 std::vector<std::string> ignored_info;
234 std::vector<cv::Mat> outs = itsAuxNet->process(ins, ignored_info);
235
236 // Now update the main network:
237 for (size_t i = 0; i < outs.size(); ++i)
238 itsNetwork->setExtraInputFromFloat32(i + 1 /* input number */, outs[i]);
239 }
240 else
241 {
242 // Using CLIP only, main network should expect only 1 extra input for CLIP embeddings:
243 itsNetwork->setExtraInputFromFloat32(1 /* input number */, itsEmbeddings);
244 }
245
246 LINFO("Updated main network with modified classes.");
247}
248
249// ####################################################################################################
250cv::Mat const & jevois::dnn::YOLOjevois::image(size_t const classid) const
251{
252 if (classid >= itsNumClasses) LFATAL("Invalid class id "<<classid<<" (only have " << itsNumClasses<<" classes)");
253 return itsCLIPimages[classid];
254}
255
256#endif // JEVOIS_PRO
257
#define JEVOIS_CUSTOM_DNN_PATH
Directory where custom DNN models are stored:
Definition Config.H:85
#define JEVOIS_SHARE_PATH
Base path for shared files (e.g., neural network weights, etc)
Definition Config.H:82
A component of a model hierarchy.
Definition Component.H:182
Helper class to assist modules in creating graphical and GUI elements.
Definition GUIhelper.H:133
Interface to a CLIP model used to compute text and image embeddings.
Definition CLIP.H:36
void setup(size_t nclass, GUIhelper *helper, std::shared_ptr< jevois::dnn::Network > net)
Definition YOLOjevois.C:32
void update(size_t const classnum, std::string const &label)
Update one class using text.
Definition YOLOjevois.C:76
int textEmbeddingSize()
Get CLIP text embedding size, or 0 if we do not have a CLIP text encoder.
Definition YOLOjevois.C:51
void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
Definition YOLOjevois.C:44
virtual ~YOLOjevois()
Virtual destructor.
Definition YOLOjevois.C:40
bool ready()
Are we ready to work, or still loading our networks?
Definition YOLOjevois.C:104
int imageEmbeddingSize()
Get CLIP image embedding size, or 0 if we do not have a CLIP image encoder.
Definition YOLOjevois.C:59
cv::Mat const & image(size_t const classid) const
Access our class definition images.
Definition YOLOjevois.C:250
YOLOjevois(std::string const &instance, std::map< int, std::string > &labels)
Inherited constructor ok; must call setup() before using.
Definition YOLOjevois.C:27
void load()
Load CLIP and ONNX networks, in a non-blocking thread.
Definition YOLOjevois.C:129
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition Log.H:230
void warnAndRethrowException(std::string const &prefix="")
Convenience function to catch an exception, issue some LERROR (depending on type),...
Definition Log.C:203
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
Definition Log.H:194
std::string getLabel(std::map< int, std::string > const &labels, int id, bool namedonly=false)
Get a label from an id.
Definition Utils.C:85
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
Async execution using a thread pool.
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition Utils.C:386
bool stringStartsWith(std::string const &str, std::string const &prefix)
Return true if str starts with prefix (including if both strings are equal)
Definition Utils.C:295
Main namespace for all JeVois classes and functions.
Definition Concepts.dox:2