JeVois  1.22
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
yolov8pose_postprocess.C
Go to the documentation of this file.
1/**
2* Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved.
3* Distributed under the LGPL license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
4**/
5
6// JEVOIS EDITS: make SCORE_THRESHOLD, IOU_THRESHOLD parameters
7// comment out unused code
8// fixed some warnings
9
10#ifdef JEVOIS_PRO
11
12// General includes
13#include <iostream>
14#include <vector>
15
16// Hailo includes
17#include <jevois/DNN/hailo/common/math.hpp>
18#include <jevois/DNN/hailo/common/hailo_objects.hpp>
19#include <jevois/DNN/hailo/common/tensors.hpp>
20#include <jevois/DNN/hailo/common/labels/coco_eighty.hpp>
21#include <jevois/DNN/hailo/yolov8pose_postprocess.hpp>
22
23using namespace xt::placeholders;
24
25//#define SCORE_THRESHOLD 0.6
26//#define IOU_THRESHOLD 0.7
27#define NUM_CLASSES 1
28
29std::vector<std::pair<int, int>> JOINT_PAIRS = {
30 {0, 1}, {1, 3}, {0, 2}, {2, 4},
31 {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10},
32 {5, 11}, {6, 12}, {11, 12},
33 {11, 13}, {12, 14}, {13, 15}, {14, 16}
34};
35
36
37std::pair<std::vector<KeyPt>, std::vector<PairPairs>> filter_keypoints(std::vector<Decodings> filtered_decodings,
38 std::vector<int> network_dims, float joint_threshold) {
39 std::vector<KeyPt> filtered_keypoints;
40 std::vector<PairPairs> filtered_pairs;
41
42 for (auto& dec : filtered_decodings){
43 auto keypoint_coordinates_and_score = dec.keypoints;
44 auto coordinates = keypoint_coordinates_and_score.first;
45 auto score = keypoint_coordinates_and_score.second;
46
47 // Filter keypoints
48 for (int i = 0; i < int(score.shape(0)); i++){
49 if (score(i,0) > joint_threshold) {
50 filtered_keypoints.push_back(KeyPt({coordinates(i, 0) / network_dims[0], coordinates(i, 1) / network_dims[1], score(i,0)}));
51 }
52 }
53
54 // Filter joints pair
55 for (const auto& pair : JOINT_PAIRS) {
56 if (score(pair.first,0) >= joint_threshold && score(pair.second, 0) >= joint_threshold){
57 PairPairs pr = PairPairs({
58 std::make_pair(coordinates(pair.first,0) / network_dims[0], coordinates(pair.first,1) / network_dims[1]),
59 std::make_pair(coordinates(pair.second,0) / network_dims[0], coordinates(pair.second,1) / network_dims[1]),
60 score(pair.first, 0),
61 score(pair.second, 0)
62 });
63 filtered_pairs.push_back(pr);
64 }
65 }
66 }
67
68 return std::make_pair(filtered_keypoints, filtered_pairs);
69}
70
71
72float iou_calc(const HailoBBox &box_1, const HailoBBox &box_2)
73{
74 // Calculate IOU between two detection boxes
75 const float width_of_overlap_area = std::min(box_1.xmax(), box_2.xmax()) - std::max(box_1.xmin(), box_2.xmin());
76 const float height_of_overlap_area = std::min(box_1.ymax(), box_2.ymax()) - std::max(box_1.ymin(), box_2.ymin());
77 const float positive_width_of_overlap_area = std::max(width_of_overlap_area, 0.0f);
78 const float positive_height_of_overlap_area = std::max(height_of_overlap_area, 0.0f);
79 const float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area;
80 const float box_1_area = (box_1.ymax() - box_1.ymin()) * (box_1.xmax() - box_1.xmin());
81 const float box_2_area = (box_2.ymax() - box_2.ymin()) * (box_2.xmax() - box_2.xmin());
82 // The IOU is a ratio of how much the boxes overlap vs their size outside the overlap.
83 // Boxes that are similar will have a higher overlap threshold.
84 return area_of_overlap / (box_1_area + box_2_area - area_of_overlap);
85}
86
87std::vector<Decodings> nms(std::vector<Decodings> &decodings, const float iou_thr, bool should_nms_cross_classes = false) {
88
89 std::vector<Decodings> decodings_after_nms;
90
91 for (uint index = 0; index < decodings.size(); index++)
92 {
93 if (decodings[index].detection_box.get_confidence() != 0.0f)
94 {
95 for (uint jindex = index + 1; jindex < decodings.size(); jindex++)
96 {
97 if ((should_nms_cross_classes || (decodings[index].detection_box.get_class_id() == decodings[jindex].detection_box.get_class_id())) &&
98 decodings[jindex].detection_box.get_confidence() != 0.0f)
99 {
100 // For each detection, calculate the IOU against each following detection.
101 float iou = iou_calc(decodings[index].detection_box.get_bbox(), decodings[jindex].detection_box.get_bbox());
102 // If the IOU is above threshold, then we have two similar detections,
103 // and want to delete the one.
104 if (iou >= iou_thr)
105 {
106 // The detections are arranged in highest score order,
107 // so we want to erase the latter detection.
108 decodings[jindex].detection_box.set_confidence(0.0f);
109 }
110 }
111 }
112 }
113 }
114 for (uint index = 0; index < decodings.size(); index++)
115 {
116 if (decodings[index].detection_box.get_confidence() != 0.0f)
117 {
118 decodings_after_nms.push_back(Decodings{decodings[index].detection_box, decodings[index].keypoints, decodings[index].joint_pairs});
119 }
120 }
121 return decodings_after_nms;
122}
123
124
125float dequantize_value(uint8_t val, float32_t qp_scale, float32_t qp_zp){
126 return (float(val) - qp_zp) * qp_scale;
127}
128
129
130void dequantize_box_values(xt::xarray<float>& dequantized_outputs, int index,
131 xt::xarray<uint8_t>& quantized_outputs,
132 size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp){
133 for (size_t i = 0; i < dim1; i++){
134 for (size_t j = 0; j < dim2; j++){
135 dequantized_outputs(i, j) = dequantize_value(quantized_outputs(index, i, j), qp_scale, qp_zp);
136 }
137 }
138}
139
140float dequantize_value16(uint16_t val, float32_t qp_scale, float32_t qp_zp){
141 return (float(val) - qp_zp) * qp_scale;
142}
143
144
145void dequantize_box_values16(xt::xarray<float>& dequantized_outputs, int index,
146 xt::xarray<uint16_t>& quantized_outputs,
147 size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp){
148 for (size_t i = 0; i < dim1; i++){
149 for (size_t j = 0; j < dim2; j++){
150 dequantized_outputs(i, j) = dequantize_value16(quantized_outputs(index, i, j), qp_scale, qp_zp);
151 }
152 }
153}
154
155std::vector<xt::xarray<double>> get_centers(std::vector<int>& strides, std::vector<int>& network_dims,
156 std::size_t boxes_num, int strided_width, int strided_height){
157
158 std::vector<xt::xarray<double>> centers(boxes_num);
159
160 for (uint i=0; i < boxes_num; i++) {
161 strided_width = network_dims[0] / strides[i];
162 strided_height = network_dims[1] / strides[i];
163
164 // Create a meshgrid of the proper strides
165 xt::xarray<int> grid_x = xt::arange(0, strided_width);
166 xt::xarray<int> grid_y = xt::arange(0, strided_height);
167
168 auto mesh = xt::meshgrid(grid_x, grid_y);
169 grid_x = std::get<1>(mesh);
170 grid_y = std::get<0>(mesh);
171
172 // Use the meshgrid to build up box center prototypes
173 auto ct_row = (xt::flatten(grid_y) + 0.5) * strides[i];
174 auto ct_col = (xt::flatten(grid_x) + 0.5) * strides[i];
175
176 centers[i] = xt::stack(xt::xtuple(ct_col, ct_row, ct_col, ct_row), 1);
177 }
178
179 return centers;
180}
181
182
183std::vector<Decodings> decode_boxes_and_keypoints(std::vector<HailoTensorPtr> raw_boxes_outputs,
184 xt::xarray<float> scores,
185 std::vector<HailoTensorPtr> raw_keypoints,
186 std::vector<int> network_dims,
187 std::vector<int> strides,
188 int regression_length, float score_threshold) {
189 int strided_width = 0, strided_height = 0, class_index = 0;
190 std::vector<Decodings> decodings;
191 std::vector<PairPairs> joint_pairs;
192 int instance_index = 0;
193 float confidence = 0.0;
194 std::string label;
195
196 auto centers = get_centers(std::ref(strides), std::ref(network_dims), raw_boxes_outputs.size(), strided_width, strided_height);
197
198 // Box distribution to distance
199 auto regression_distance = xt::reshape_view(xt::arange(0, regression_length + 1), {1, 1, regression_length + 1});
200
201 for (uint i = 0; i < raw_boxes_outputs.size(); i++)
202 {
203 // Boxes setup
204 float32_t qp_scale = raw_boxes_outputs[i]->vstream_info().quant_info.qp_scale;
205 float32_t qp_zp = raw_boxes_outputs[i]->vstream_info().quant_info.qp_zp;
206
207 auto output_b = common::get_xtensor(raw_boxes_outputs[i]);
208 int num_proposals = output_b.shape(0) * output_b.shape(1);
209 auto output_boxes = xt::view(output_b, xt::all(), xt::all(), xt::all());
210 xt::xarray<uint8_t> quantized_boxes = xt::reshape_view(output_boxes, {num_proposals, 4, regression_length + 1});
211
212 auto shape = {quantized_boxes.shape(1), quantized_boxes.shape(2)};
213
214 // Keypoints setup
215 float32_t qp_scale_kpts = raw_keypoints[i]->vstream_info().quant_info.qp_scale;
216 float32_t qp_zp_kpts = raw_keypoints[i]->vstream_info().quant_info.qp_zp;
217
218 xt::xarray<uint16_t> output_keypoints = common::get_xtensor_uint16(raw_keypoints[i]);
219 int num_proposals_keypoints = output_keypoints.shape(0) * output_keypoints.shape(1);
220 xt::xarray<uint16_t> output_keypoints_quantized = xt::view(output_keypoints, xt::all(), xt::all(), xt::all());
221 xt::xarray<uint16_t> quantized_keypoints = xt::reshape_view(output_keypoints_quantized, {num_proposals_keypoints, 17, 3});
222
223 auto keypoints_shape = {quantized_keypoints.shape(1), quantized_keypoints.shape(2)};
224
225 // Bbox decoding
226 for (uint j = 0; j < uint(num_proposals); j++) {
227 confidence = xt::row(scores, instance_index)(0);
228 instance_index++;
229 if (confidence < score_threshold)
230 continue;
231
232 xt::xarray<float> box(shape);
233 xt::xarray<float> kpts_corrdinates_and_scores(keypoints_shape);
234
235 dequantize_box_values(box, j, quantized_boxes,
236 box.shape(0), box.shape(1),
237 qp_scale, qp_zp);
238 common::softmax_2D(box.data(), box.shape(0), box.shape(1));
239
240 auto box_distance = box * regression_distance;
241 xt::xarray<float> reduced_distances = xt::sum(box_distance, {2});
242 auto strided_distances = reduced_distances * strides[i];
243
244 // Decode box
245 auto distance_view1 = xt::view(strided_distances, xt::all(), xt::range(_, 2)) * -1;
246 auto distance_view2 = xt::view(strided_distances, xt::all(), xt::range(2, _));
247 auto distance_view = xt::concatenate(xt::xtuple(distance_view1, distance_view2), 1);
248 auto decoded_box = centers[i] + distance_view;
249
250 HailoBBox bbox(decoded_box(j, 0) / network_dims[0],
251 decoded_box(j, 1) / network_dims[1],
252 (decoded_box(j, 2) - decoded_box(j, 0)) / network_dims[0],
253 (decoded_box(j, 3) - decoded_box(j, 1)) / network_dims[1]);
254
255 label = common::coco_eighty[class_index + 1];
256 HailoDetection detected_instance(bbox, class_index, label, confidence);
257
258 // Decode keypoints
259 dequantize_box_values16(kpts_corrdinates_and_scores, j, quantized_keypoints,
260 kpts_corrdinates_and_scores.shape(0), kpts_corrdinates_and_scores.shape(1),
261 qp_scale_kpts, qp_zp_kpts);
262
263 auto kpts_corrdinates = xt::view(kpts_corrdinates_and_scores, xt::all(), xt::range(0, 2));
264 auto keypoints_scores = xt::view(kpts_corrdinates_and_scores, xt::all(), xt::range(2, xt::placeholders::_));
265
266 kpts_corrdinates *= 2;
267
268 auto center = xt::view(centers[i], xt::all(), xt::range(0, 2));
269 auto center_values = xt::xarray<float>{(float)center(j,0), (float)center(j,1)};
270
271 kpts_corrdinates = strides[i] * (kpts_corrdinates - 0.5) + center_values;
272
273 // Apply sigmoid to keypoints scores
274 auto sigmoided_scores = 1 / (1 + xt::exp(-keypoints_scores));
275
276 auto keypoint = std::make_pair(kpts_corrdinates, sigmoided_scores);
277
278 decodings.push_back(Decodings{detected_instance, keypoint, joint_pairs});
279 }
280 }
281
282 return decodings;
283}
284
285
286Triple get_boxes_scores_keypoints(std::vector<HailoTensorPtr> &tensors, int num_classes, int /*regression_length*/){
287 std::vector<HailoTensorPtr> outputs_boxes(tensors.size() / 3);
288 std::vector<HailoTensorPtr> outputs_keypoints(tensors.size() / 3);
289
290 // Prepare the scores xarray at the size we will fill in in-place
291 int total_scores = 0;
292 for (uint i = 0; i < tensors.size(); i = i + 3) {
293 total_scores += tensors[i+1]->width() * tensors[i+1]->height();
294 }
295
296 std::vector<size_t> scores_shape = { (long unsigned int)total_scores, (long unsigned int)num_classes};
297
298 xt::xarray<float> scores(scores_shape);
299
300 int view_index_scores = 0;
301
302 for (uint i = 0; i < tensors.size(); i = i + 3)
303 {
304 // Bounding boxes extraction will be done later on only on the boxes that surpass the score threshold
305 outputs_boxes[i / 3] = tensors[i];
306
307 // Extract and dequantize the scores outputs
308 auto dequantized_output_s = common::dequantize(common::get_xtensor(tensors[i+1]), tensors[i+1]->vstream_info().quant_info.qp_scale, tensors[i+1]->vstream_info().quant_info.qp_zp);
309 int num_proposals_scores = dequantized_output_s.shape(0)*dequantized_output_s.shape(1);
310
311 // From the layer extract the scores
312 auto output_scores = xt::view(dequantized_output_s, xt::all(), xt::all(), xt::all());
313 xt::view(scores, xt::range(view_index_scores, view_index_scores + num_proposals_scores), xt::all()) = xt::reshape_view(output_scores, {num_proposals_scores, num_classes});
314 view_index_scores += num_proposals_scores;
315
316 // Keypoints extraction will be done later according to the boxes that surpass the threshold
317 outputs_keypoints[i / 3] = tensors[i+2];
318
319 }
320 return Triple{outputs_boxes, scores, outputs_keypoints};
321}
322
323std::vector<Decodings> yolov8pose_postprocess(std::vector<HailoTensorPtr> &tensors,
324 std::vector<int> network_dims,
325 std::vector<int> strides,
326 int regression_length,
327 int num_classes, float score_threshold, float iou_threshold)
328{
329 std::vector<Decodings> decodings;
330 if (tensors.size() == 0)
331 {
332 return decodings;
333 }
334
335 Triple boxes_scores_keypoints = get_boxes_scores_keypoints(tensors, num_classes, regression_length);
336 std::vector<HailoTensorPtr> raw_boxes = boxes_scores_keypoints.boxes;
337 xt::xarray<float> scores = boxes_scores_keypoints.scores;
338 std::vector<HailoTensorPtr> raw_keypoints = boxes_scores_keypoints.keypoints;
339
340 // Decode the boxes and keypoints
341 decodings = decode_boxes_and_keypoints(raw_boxes, scores, raw_keypoints, network_dims, strides, regression_length,
342 score_threshold);
343
344 // Filter with NMS
345 auto decodings_after_nms = nms(decodings, iou_threshold, true);
346
347 return decodings_after_nms;
348}
349
350/**
351 * @brief yolov8 postprocess
352 * Provides network specific paramters
353 *
354 * @param roi - HailoROIPtr
355 * The roi that contains the ouput tensors
356 */
357/* not used by jevois
358
359std::pair<std::vector<KeyPt>, std::vector<PairPairs>> yolov8(HailoROIPtr roi)
360{
361 // anchor params
362 int regression_length = 15;
363 std::vector<int> strides = {8, 16, 32};
364 std::vector<int> network_dims = {640, 640};
365
366 std::vector<HailoTensorPtr> tensors = roi->get_tensors();
367 auto filtered_decodings = yolov8pose_postprocess(tensors, network_dims, strides, regression_length, NUM_CLASSES);
368
369 std::vector<HailoDetection> detections;
370
371 for (auto& dec : filtered_decodings){
372 detections.push_back(dec.detection_box);
373 }
374
375 hailo_common::add_detections(roi, detections);
376
377 std::pair<std::vector<KeyPt>, std::vector<PairPairs>> keypoints_and_pairs = filter_keypoints(filtered_decodings, network_dims);
378
379 return keypoints_and_pairs;
380}
381*/
382
383//******************************************************************
384// DEFAULT FILTER
385//******************************************************************
386/* not used by jevois
387std::pair<std::vector<KeyPt>, std::vector<PairPairs>> filter(HailoROIPtr roi)
388{
389 return yolov8(roi);
390}
391*/
392
393#endif // JEVOIS_PRO
float iou_calc(const HailoBBox &box_1, const HailoBBox &box_2)
std::vector< std::pair< int, int > > JOINT_PAIRS
std::vector< Decodings > nms(std::vector< Decodings > &decodings, const float iou_thr, bool should_nms_cross_classes=false)
std::vector< Decodings > decode_boxes_and_keypoints(std::vector< HailoTensorPtr > raw_boxes_outputs, xt::xarray< float > scores, std::vector< HailoTensorPtr > raw_keypoints, std::vector< int > network_dims, std::vector< int > strides, int regression_length, float score_threshold)
void dequantize_box_values(xt::xarray< float > &dequantized_outputs, int index, xt::xarray< uint8_t > &quantized_outputs, size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp)
float dequantize_value(uint8_t val, float32_t qp_scale, float32_t qp_zp)
float dequantize_value16(uint16_t val, float32_t qp_scale, float32_t qp_zp)
void dequantize_box_values16(xt::xarray< float > &dequantized_outputs, int index, xt::xarray< uint16_t > &quantized_outputs, size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp)
std::pair< std::vector< KeyPt >, std::vector< PairPairs > > filter_keypoints(std::vector< Decodings > filtered_decodings, std::vector< int > network_dims, float joint_threshold)
std::vector< Decodings > yolov8pose_postprocess(std::vector< HailoTensorPtr > &tensors, std::vector< int > network_dims, std::vector< int > strides, int regression_length, int num_classes, float score_threshold, float iou_threshold)
std::vector< xt::xarray< double > > get_centers(std::vector< int > &strides, std::vector< int > &network_dims, std::size_t boxes_num, int strided_width, int strided_height)
Triple get_boxes_scores_keypoints(std::vector< HailoTensorPtr > &tensors, int num_classes, int)