JeVois  1.23
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
YOLOjevois.H
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2024 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17#pragma once
18
19#ifdef JEVOIS_PRO
20
22#include <onnxruntime_cxx_api.h>
23#include <opencv2/opencv.hpp>
24#include <ovxlib/vsi_nn_pub.h> // for data types and quantization types
25
26namespace jevois
27{
28 class GUIhelper;
29
30 namespace dnn
31 {
32 class CLIP;
33 class Network;
34
35 namespace yolojevois
36 {
37 static jevois::ParameterCategory const ParamCateg("YOLOjevois Model Options");
38
39 JEVOIS_DECLARE_PARAMETER(clipmodel, std::string, "Path to ggml CLIP model to use, in gguf "
40 "format. If path is relative, it is within " JEVOIS_SHARE_PATH "/clip/",
41 "clip-vit-base-patch32_ggml-model-q8_0.gguf", ParamCateg);
42
43 JEVOIS_DECLARE_PARAMETER(textmodel, std::string, "Path ONNX YOLO-JeVois model to use "
44 "to process CLIP text embeddings, or empty to just use the raw "
45 "CLIP embeddings. If path is relative, it "
46 "is within " JEVOIS_SHARE_PATH "/ort/detection/",
47 "", ParamCateg);
48 }
49
50 //! Helper class for runtime-configurable, quantized open-vocabulary object detection
51 /*! YOLO-JeVois splits the YOLO-World model into 3 components to allow runtime changes of class definitions on a
52 running quantized model: 1) a CLIP model to convert text or image class definitions into 512D embeddings; 2) A
53 YOLOjevois helper that runs an ONNX model on CPU when classes are updated, to convert the 512D CLIP embeddings
54 into 5 tensors that will be used by the quantized object detection model; 3) a trimmed YOLO-World model (usually
55 quantized for NPU) that takes an image plus those 5 tensors as an input, to generate detection boxes. In
56 addition, a second variant is available, which is a bit slower, where: 1) class names or images are converted to
57 CLIP embeddings; 2) these are input along with an image to a full YOLO-World model. This approach is slower and
58 only works well on NPU when using 16-bit quantization. \ingroup dnn */
59 class YOLOjevois : public Component,
60 public Parameter<yolojevois::clipmodel, yolojevois::textmodel>
61 {
62 public:
63 //! Inherited constructor ok; must call setup() before using
64 YOLOjevois(std::string const & instance, std::map<int, std::string> & labels);
65
66 /*! Initialize for nclass object classes. All labels in the label map will be processed upon load(), which is
67 triggered by ready(), giving rise to embeddings; or, if the label map is missing some class labels, default
68 ones will be created. Note that YOLOjevois may modify some of the labels during load() and update(). */
69 void setup(size_t nclass, GUIhelper * helper, std::shared_ptr<jevois::dnn::Network> net);
70
71 //! Virtual destructor
72 virtual ~YOLOjevois();
73
74 //! Freeze/unfreeze parameters that users should not change while running
75 void freeze(bool doit);
76
77 //! Are we ready to work, or still loading our networks?
78 bool ready();
79
80 //! Get CLIP text embedding size, or 0 if we do not have a CLIP text encoder
82
83 //! Get CLIP image embedding size, or 0 if we do not have a CLIP image encoder
85
86 //! Update one class using text
87 void update(size_t const classnum, std::string const & label);
88
89 //! Update one class using an RGB image
90 void update(size_t const classnum, cv::Mat const & img);
91
92 //! Access our class definition images
93 /*! Returned vector always has one cv::Mat per class, but that Mat may be empty if class was not updated by
94 image. Caution not thread-safe. */
95 cv::Mat const & image(size_t const classid) const;
96
97 protected:
98 std::map<int, std::string> & itsLabels;
99 size_t itsNumClasses = 0;
100 cv::Mat itsEmbeddings; // text/image embeddings as 1xCx512 for C classes
101 std::vector<cv::Mat> itsCLIPimages;
102 std::shared_ptr<jevois::dnn::Network> itsNetwork; //!< Pointer to the main YOLO to update its extra inputs
103 std::shared_ptr<CLIP> itsCLIP; //!< CLIP network to get embeddings from text or image queries
104 std::shared_ptr<Network> itsAuxNet; //!< Optional aux network to process CLIP embeddings
105
106 void load(); //!< Load CLIP and ONNX networks, in a non-blocking thread
107
108 private:
109 void updateMainNetwork();
110 std::atomic<bool> itsLoading = false;
111 std::atomic<bool> itsLoaded = false;
112 std::future<void> itsLoadFut;
113 jevois::GUIhelper * itsHelper = nullptr;
114 };
115 }
116}
117
118#endif // JEVOIS_PRO
#define JEVOIS_SHARE_PATH
Base path for shared files (e.g., neural network weights, etc)
Definition Config.H:82
A component of a model hierarchy.
Definition Component.H:182
Helper class to assist modules in creating graphical and GUI elements.
Definition GUIhelper.H:133
Helper class for runtime-configurable, quantized open-vocabulary object detection.
Definition YOLOjevois.H:61
std::shared_ptr< jevois::dnn::Network > itsNetwork
Pointer to the main YOLO to update its extra inputs.
Definition YOLOjevois.H:102
std::vector< cv::Mat > itsCLIPimages
Definition YOLOjevois.H:101
void setup(size_t nclass, GUIhelper *helper, std::shared_ptr< jevois::dnn::Network > net)
Definition YOLOjevois.C:32
void update(size_t const classnum, std::string const &label)
Update one class using text.
Definition YOLOjevois.C:76
std::map< int, std::string > & itsLabels
Definition YOLOjevois.H:98
int textEmbeddingSize()
Get CLIP text embedding size, or 0 if we do not have a CLIP text encoder.
Definition YOLOjevois.C:51
void freeze(bool doit)
Freeze/unfreeze parameters that users should not change while running.
Definition YOLOjevois.C:44
std::shared_ptr< Network > itsAuxNet
Optional aux network to process CLIP embeddings.
Definition YOLOjevois.H:104
virtual ~YOLOjevois()
Virtual destructor.
Definition YOLOjevois.C:40
bool ready()
Are we ready to work, or still loading our networks?
Definition YOLOjevois.C:104
std::shared_ptr< CLIP > itsCLIP
CLIP network to get embeddings from text or image queries.
Definition YOLOjevois.H:103
int imageEmbeddingSize()
Get CLIP image embedding size, or 0 if we do not have a CLIP image encoder.
Definition YOLOjevois.C:59
cv::Mat const & image(size_t const classid) const
Access our class definition images.
Definition YOLOjevois.C:250
void load()
Load CLIP and ONNX networks, in a non-blocking thread.
Definition YOLOjevois.C:129
JEVOIS_DECLARE_PARAMETER(clipmodel, std::string, "Path to ggml CLIP model to use, in gguf " "format. If path is relative, it is within " JEVOIS_SHARE_PATH "/clip/", "clip-vit-base-patch32_ggml-model-q8_0.gguf", ParamCateg)
Main namespace for all JeVois classes and functions.
Definition Concepts.dox:2
A category to which multiple ParameterDef definitions can belong.