34  itsNumClasses = nclass;
 
 
   46  clipmodel::freeze(doit);
 
   47  textmodel::freeze(doit);
 
 
   53  if (ready() == 
false) 
LFATAL(
"Not ready yet");
 
   54  if (itsCLIP) 
return itsCLIP->textEmbeddingSize();
 
 
   61  if (ready() == 
false) 
LFATAL(
"Not ready yet");
 
   62  if (itsCLIP) 
return itsCLIP->imageEmbeddingSize();
 
 
   69  inline void setEmbedding(cv::Mat & all, 
size_t clsid, cv::Mat 
const & e)
 
   71    memcpy(all.data + clsid * e.cols * 
sizeof(
float), e.data, e.cols * 
sizeof(
float));
 
   78  if (ready() == 
false) 
LFATAL(
"Not ready");
 
   79  if (itsCLIP->textEmbeddingSize() == 0) 
LFATAL(
"Our CLIP model does not have a text encoder");
 
   80  if (classnum >= itsNumClasses) 
LFATAL(
"Invalid class #" << classnum << 
" given " << itsNumClasses << 
" classes");
 
   82  setEmbedding(itsEmbeddings, classnum, itsCLIP->textEmbedding(label));
 
   83  itsLabels[classnum] = label;
 
   84  itsCLIPimages[classnum] = cv::Mat();
 
   86  itsHelper->reportInfo(
"Updated class " + std::to_string(classnum) + 
" to [" + label + 
']');
 
 
   92  if (ready() == 
false) 
LFATAL(
"Not ready");
 
   93  if (itsCLIP->imageEmbeddingSize() == 0) 
LFATAL(
"Our CLIP model does not have an image encoder");
 
   94  if (classnum >= itsNumClasses) 
LFATAL(
"Invalid class #" << classnum << 
" given " << itsNumClasses << 
" classes");
 
   96  setEmbedding(itsEmbeddings, classnum, itsCLIP->imageEmbedding(img));
 
   97  itsLabels[classnum] = 
"<image for class " + std::to_string(classnum) + 
'>';
 
   98  itsCLIPimages[classnum] = img;
 
  100  itsHelper->reportInfo(
"Updated class " + std::to_string(classnum) + 
" from image");
 
 
  107  if (itsLoaded.load()) 
return true;
 
  110  if (itsLoading.load())
 
  112    if (itsLoadFut.valid() && itsLoadFut.wait_for(std::chrono::milliseconds(2)) == std::future_status::ready)
 
  114      try { itsLoadFut.get(); 
LINFO(
"YOLOjevois loaded."); 
return true; }
 
  121  itsLoading.store(
true);
 
  123  LINFO(
"Loading YOLOjevois helper networks...");
 
 
  131  if (! itsNetwork || itsNumClasses == 0 || itsHelper == 
nullptr) 
LFATAL(
"Need to call setup() first");
 
  134  if (itsAuxNet) { itsAuxNet.reset(); removeSubComponent(
"auxnet", 
false); }
 
  137  std::string 
const clipname = yolojevois::clipmodel::get();
 
  138  if (clipname.empty()) 
return;
 
  140  if (itsCLIP->textEmbeddingSize() == 0) 
LFATAL(
"CLIP model must have at least a text encoder");
 
  141  bool const has_image_encoder = (itsCLIP->imageEmbeddingSize() > 0) ? 
true : 
false;
 
  144  int const vec_dim = itsCLIP->textEmbeddingSize();
 
  145  itsEmbeddings = cv::Mat(std::vector<int> { 1, int(itsNumClasses), vec_dim }, CV_32F );
 
  146  itsCLIPimages.clear();
 
  148  for (
size_t i = 0; i < itsNumClasses; ++i)
 
  155      itsHelper->reportError(
"Invalid label for class " + std::to_string(i) + 
" -- FORCING TO 'person'");
 
  156      label = 
"person"; itsLabels[i] = label;
 
  161      if (has_image_encoder)
 
  165        cv::Mat img_bgr = cv::imread(imgpath, cv::IMREAD_COLOR);
 
  168          itsHelper->reportError(
"Failed to read " + imgpath + 
" -- FORCING CLASS "+std::to_string(i)+
" TO 'person'");
 
  169          label = 
"person"; itsLabels[i] = label;
 
  170          setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
 
  174          cv::cvtColor(img_bgr, img, cv::COLOR_BGR2RGB);
 
  175          LINFO(
"Computing CLIP image embedding for class " << i << 
" [" << imgpath << 
"] ...");
 
  176          setEmbedding(itsEmbeddings, i, itsCLIP->imageEmbedding(img));
 
  177          itsLabels[i] = 
"<from image file>"; 
 
  182        itsHelper->reportError(
"No CLIP image encoder -- FORCING CLASS "+std::to_string(i)+
" TO 'person'");
 
  183        label = 
"person"; itsLabels[i] = label;
 
  184        setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
 
  189      LINFO(
"Computing CLIP text embedding for class " << i << 
" [" << label << 
"] ...");
 
  190      setEmbedding(itsEmbeddings, i, itsCLIP->textEmbedding(label));
 
  192    itsCLIPimages.emplace_back(img);
 
  194  LINFO(
"CLIP embeddings ready for " << itsNumClasses << 
" object classes");
 
  197  std::string 
const onnxmodel = yolojevois::textmodel::get();
 
  198  if (onnxmodel.empty() == 
false)
 
  201    LINFO(
"Loading embedding helper " << m << 
" ...");
 
  203    itsAuxNet = addSubComponent<jevois::dnn::NetworkONNX>(
"auxnet");
 
  204    itsAuxNet->hideAllParams(
true);
 
  205    itsAuxNet->setParamStringUnique(
"model", m);
 
  208    while (itsAuxNet->ready() == 
false && iter++ < 1000) std::this_thread::sleep_for(std::chrono::milliseconds(10));
 
  209    if (iter == 1000) 
LFATAL(
"Timeout waiting for embedding helper to load...");
 
  211    LINFO(
"Embedding helper ready.");
 
  215  itsLoaded.store(
true);
 
  216  itsLoading.store(
false);
 
 
  223void jevois::dnn::YOLOjevois::updateMainNetwork()
 
  225  if (ready() == 
false) 
LFATAL(
"Not ready");
 
  226  if (! itsNetwork) 
LFATAL(
"No main network to update");
 
  227  if (itsNetwork->ready() == 
false) 
LFATAL(
"Main network not ready");
 
  232    std::vector<cv::Mat> ins; ins.push_back(itsEmbeddings);
 
  233    std::vector<std::string> ignored_info;
 
  234    std::vector<cv::Mat> outs = itsAuxNet->process(ins, ignored_info);
 
  237    for (
size_t i = 0; i < outs.size(); ++i)
 
  238      itsNetwork->setExtraInputFromFloat32(i + 1 , outs[i]);
 
  243    itsNetwork->setExtraInputFromFloat32(1 , itsEmbeddings);
 
  246  LINFO(
"Updated main network with modified classes.");
 
  252  if (classid >= itsNumClasses) 
LFATAL(
"Invalid class id "<<classid<<
" (only have " << itsNumClasses<<
" classes)");
 
  253  return itsCLIPimages[classid];
 
 
#define JEVOIS_CUSTOM_DNN_PATH
Directory where custom DNN models are stored:
#define JEVOIS_SHARE_PATH
Base path for shared files (e.g., neural network weights, etc)
A component of a model hierarchy.
Helper class to assist modules in creating graphical and GUI elements.
Interface to a CLIP model used to compute text and image embeddings.
void setup(size_t nclass, GUIhelper *helper, std::shared_ptr< jevois::dnn::Network > net)
void update(size_t const classnum, std::string const &label)
Update one class using text.
int textEmbeddingSize()
Get CLIP text embedding size, or 0 if we do not have a CLIP text encoder.
void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
virtual ~YOLOjevois()
Virtual destructor.
bool ready()
Are we ready to work, or still loading our networks?
int imageEmbeddingSize()
Get CLIP image embedding size, or 0 if we do not have a CLIP image encoder.
cv::Mat const & image(size_t const classid) const
Access our class definition images.
YOLOjevois(std::string const &instance, std::map< int, std::string > &labels)
Inherited constructor ok; must call setup() before using.
void load()
Load CLIP and ONNX networks, in a non-blocking thread.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
void warnAndRethrowException(std::string const &prefix="")
Convenience function to catch an exception, issue some LERROR (depending on type),...
#define LINFO(msg)
Convenience macro for users to print out console or syslog messages, INFO level.
std::string getLabel(std::map< int, std::string > const &labels, int id, bool namedonly=false)
Get a label from an id.
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
Async execution using a thread pool.
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
bool stringStartsWith(std::string const &str, std::string const &prefix)
Return true if str starts with prefix (including if both strings are equal)
Main namespace for all JeVois classes and functions.