34 itsNumClasses = nclass;
46 clipmodel::freeze(doit);
47 textmodel::freeze(doit);
53 if (ready() ==
false)
LFATAL(
"Not ready yet");
54 if (itsCLIP)
return itsCLIP->textEmbeddingSize();
61 if (ready() ==
false)
LFATAL(
"Not ready yet");
62 if (itsCLIP)
return itsCLIP->imageEmbeddingSize();
69 inline void setEmbedding(cv::Mat & all,
size_t clsid, cv::Mat
const & e)
71 memcpy(all.data + clsid * e.cols *
sizeof(
float), e.data, e.cols *
sizeof(
float));
78 if (ready() ==
false)
LFATAL(
"Not ready");
79 if (itsCLIP->textEmbeddingSize() == 0)
LFATAL(
"Our CLIP model does not have a text encoder");
80 if (classnum >= itsNumClasses)
LFATAL(
"Invalid class #" << classnum <<
" given " << itsNumClasses <<
" classes");
82 setEmbedding(itsEmbeddings, classnum, itsCLIP->textEmbedding(label));
83 itsLabels[classnum] = label;
84 itsCLIPimages[classnum] = cv::Mat();
86 itsHelper->reportInfo(
"Updated class " + std::to_string(classnum) +
" to [" + label +
']');
92 if (ready() ==
false)
LFATAL(
"Not ready");
93 if (itsCLIP->imageEmbeddingSize() == 0)
LFATAL(
"Our CLIP model does not have an image encoder");
94 if (classnum >= itsNumClasses)
LFATAL(
"Invalid class #" << classnum <<
" given " << itsNumClasses <<
" classes");
96 setEmbedding(itsEmbeddings, classnum, itsCLIP->imageEmbedding(img));
97 itsLabels[classnum] =
"<image for class " + std::to_string(classnum) +
'>';
98 itsCLIPimages[classnum] = img;
100 itsHelper->reportInfo(
"Updated class " + std::to_string(classnum) +
" from image");
107 if (itsLoaded.load())
return true;
110 if (itsLoading.load())
112 if (itsLoadFut.valid() && itsLoadFut.wait_for(std::chrono::milliseconds(2)) == std::future_status::ready)
114 try { itsLoadFut.get();
LINFO(
"YOLOjevois loaded.");
return true; }
121 itsLoading.store(
true);
123 LINFO(
"Loading YOLOjevois helper networks...");
131 if (! itsNetwork || itsNumClasses == 0 || itsHelper ==
nullptr)
LFATAL(
"Need to call setup() first");
134 if (itsAuxNet) { itsAuxNet.reset(); removeSubComponent(
"auxnet",
false); }
137 std::string
const clipname = yolojevois::clipmodel::get();
138 if (clipname.empty())
return;
140 if (itsCLIP->textEmbeddingSize() == 0)
LFATAL(
"CLIP model must have at least a text encoder");
141 bool const has_image_encoder = (itsCLIP->imageEmbeddingSize() > 0) ?
true :
false;
144 int const vec_dim = itsCLIP->textEmbeddingSize();
145 itsEmbeddings = cv::Mat(std::vector<int> { 1, int(itsNumClasses), vec_dim }, CV_32F );
146 itsCLIPimages.clear();
148 for (
size_t i = 0; i < itsNumClasses; ++i)
155 itsHelper->reportError(
"Invalid label for class " + std::to_string(i) +
" -- FORCING TO 'person'");
156 label =
"person"; itsLabels[i] = label;
161 if (has_image_encoder)
165 cv::Mat img_bgr = cv::imread(imgpath, cv::IMREAD_COLOR);
168 itsHelper->reportError(
"Failed to read " + imgpath +
" -- FORCING CLASS "+std::to_string(i)+
" TO 'person'");
169 label =
"person"; itsLabels[i] = label;
170 setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
174 cv::cvtColor(img_bgr, img, cv::COLOR_BGR2RGB);
175 LINFO(
"Computing CLIP image embedding for class " << i <<
" [" << imgpath <<
"] ...");
176 setEmbedding(itsEmbeddings, i, itsCLIP->imageEmbedding(img));
177 itsLabels[i] =
"<from image file>";
182 itsHelper->reportError(
"No CLIP image encoder -- FORCING CLASS "+std::to_string(i)+
" TO 'person'");
183 label =
"person"; itsLabels[i] = label;
184 setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
189 LINFO(
"Computing CLIP text embedding for class " << i <<
" [" << label <<
"] ...");
190 setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
192 itsCLIPimages.emplace_back(img);
194 LINFO(
"CLIP embeddings ready for " << itsNumClasses <<
" object classes");
197 std::string
const onnxmodel = yolojevois::textmodel::get();
198 if (onnxmodel.empty() ==
false)
201 LINFO(
"Loading embedding helper " << m <<
" ...");
203 itsAuxNet = addSubComponent<jevois::dnn::NetworkONNX>(
"auxnet");
204 itsAuxNet->hideAllParams(
true);
205 itsAuxNet->setParamStringUnique(
"model", m);
208 while (itsAuxNet->ready() ==
false && iter++ < 1000) std::this_thread::sleep_for(std::chrono::milliseconds(10));
209 if (iter == 1000)
LFATAL(
"Timeout waiting for embedding helper to load...");
211 LINFO(
"Embedding helper ready.");
215 itsLoaded.store(
true);
216 itsLoading.store(
false);
223void jevois::dnn::YOLOjevois::updateMainNetwork()
225 if (ready() ==
false)
LFATAL(
"Not ready");
226 if (! itsNetwork)
LFATAL(
"No main network to update");
227 if (itsNetwork->ready() ==
false)
LFATAL(
"Main network not ready");
232 std::vector<cv::Mat> ins; ins.push_back(itsEmbeddings);
233 std::vector<std::string> ignored_info;
234 std::vector<cv::Mat> outs = itsAuxNet->process(ins, ignored_info);
237 for (
size_t i = 0; i < outs.size(); ++i)
238 itsNetwork->setExtraInputFromFloat32(i + 1 , outs[i]);
243 itsNetwork->setExtraInputFromFloat32(1 , itsEmbeddings);
246 LINFO(
"Updated main network with modified classes.");
252 if (classid >= itsNumClasses)
LFATAL(
"Invalid class id "<<classid<<
" (only have " << itsNumClasses<<
" classes)");
253 return itsCLIPimages[classid];
#define JEVOIS_CUSTOM_DNN_PATH
Directory where custom DNN models are stored:
#define JEVOIS_SHARE_PATH
Base path for shared files (e.g., neural network weights, etc)
A component of a model hierarchy.
Helper class to assist modules in creating graphical and GUI elements.
Interface to a CLIP model used to compute text and image embeddings.
void setup(size_t nclass, GUIhelper *helper, std::shared_ptr< jevois::dnn::Network > net)
void update(size_t const classnum, std::string const &label)
Update one class using text.
int textEmbeddingSize()
Get CLIP text embedding size, or 0 if we do not have a CLIP text encoder.
void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
virtual ~YOLOjevois()
Virtual destructor.
bool ready()
Are we ready to work, or still loading our networks?
int imageEmbeddingSize()
Get CLIP image embedding size, or 0 if we do not have a CLIP image encoder.
cv::Mat const & image(size_t const classid) const
Access our class definition images.
YOLOjevois(std::string const &instance, std::map< int, std::string > &labels)
Inherited constructor ok; must call setup() before using.
void load()
Load CLIP and ONNX networks, in a non-blocking thread.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
void warnAndRethrowException(std::string const &prefix="")
Convenience function to catch an exception, issue some LERROR (depending on type),...
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
std::string getLabel(std::map< int, std::string > const &labels, int id, bool namedonly=false)
Get a label from an id.
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
Async execution using a thread pool.
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
bool stringStartsWith(std::string const &str, std::string const &prefix)
Return true if str starts with prefix (including if both strings are equal)
Main namespace for all JeVois classes and functions.