30 {0, 1}, {1, 3}, {0, 2}, {2, 4},
31 {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10},
32 {5, 11}, {6, 12}, {11, 12},
33 {11, 13}, {12, 14}, {13, 15}, {14, 16}
37std::pair<std::vector<KeyPt>, std::vector<PairPairs>>
filter_keypoints(std::vector<Decodings> filtered_decodings,
38 std::vector<int> network_dims,
float joint_threshold) {
39 std::vector<KeyPt> filtered_keypoints;
40 std::vector<PairPairs> filtered_pairs;
42 for (
auto& dec : filtered_decodings){
43 auto keypoint_coordinates_and_score = dec.keypoints;
44 auto coordinates = keypoint_coordinates_and_score.first;
45 auto score = keypoint_coordinates_and_score.second;
48 for (
int i = 0; i < int(score.shape(0)); i++){
49 if (score(i,0) > joint_threshold) {
50 filtered_keypoints.push_back(KeyPt({coordinates(i, 0) / network_dims[0], coordinates(i, 1) / network_dims[1], score(i,0)}));
56 if (score(pair.first,0) >= joint_threshold && score(pair.second, 0) >= joint_threshold){
57 PairPairs pr = PairPairs({
58 std::make_pair(coordinates(pair.first,0) / network_dims[0], coordinates(pair.first,1) / network_dims[1]),
59 std::make_pair(coordinates(pair.second,0) / network_dims[0], coordinates(pair.second,1) / network_dims[1]),
63 filtered_pairs.push_back(pr);
68 return std::make_pair(filtered_keypoints, filtered_pairs);
87std::vector<Decodings>
nms(std::vector<Decodings> &decodings,
const float iou_thr,
bool should_nms_cross_classes =
false) {
89 std::vector<Decodings> decodings_after_nms;
91 for (uint index = 0; index < decodings.size(); index++)
93 if (decodings[index].detection_box.get_confidence() != 0.0f)
95 for (uint jindex = index + 1; jindex < decodings.size(); jindex++)
97 if ((should_nms_cross_classes || (decodings[index].detection_box.get_class_id() == decodings[jindex].detection_box.get_class_id())) &&
98 decodings[jindex].detection_box.get_confidence() != 0.0f)
101 float iou =
iou_calc(decodings[index].detection_box.get_bbox(), decodings[jindex].detection_box.get_bbox());
108 decodings[jindex].detection_box.set_confidence(0.0f);
114 for (uint index = 0; index < decodings.size(); index++)
116 if (decodings[index].detection_box.get_confidence() != 0.0f)
118 decodings_after_nms.push_back(Decodings{decodings[index].detection_box, decodings[index].keypoints, decodings[index].joint_pairs});
121 return decodings_after_nms;
155std::vector<xt::xarray<double>>
get_centers(std::vector<int>& strides, std::vector<int>& network_dims,
156 std::size_t boxes_num,
int strided_width,
int strided_height){
158 std::vector<xt::xarray<double>> centers(boxes_num);
160 for (uint i=0; i < boxes_num; i++) {
161 strided_width = network_dims[0] / strides[i];
162 strided_height = network_dims[1] / strides[i];
165 xt::xarray<int> grid_x = xt::arange(0, strided_width);
166 xt::xarray<int> grid_y = xt::arange(0, strided_height);
168 auto mesh = xt::meshgrid(grid_x, grid_y);
169 grid_x = std::get<1>(mesh);
170 grid_y = std::get<0>(mesh);
173 auto ct_row = (xt::flatten(grid_y) + 0.5) * strides[i];
174 auto ct_col = (xt::flatten(grid_x) + 0.5) * strides[i];
176 centers[i] = xt::stack(xt::xtuple(ct_col, ct_row, ct_col, ct_row), 1);
184 xt::xarray<float> scores,
185 std::vector<HailoTensorPtr> raw_keypoints,
186 std::vector<int> network_dims,
187 std::vector<int> strides,
188 int regression_length,
float score_threshold) {
189 int strided_width = 0, strided_height = 0, class_index = 0;
190 std::vector<Decodings> decodings;
191 std::vector<PairPairs> joint_pairs;
192 int instance_index = 0;
193 float confidence = 0.0;
196 auto centers =
get_centers(std::ref(strides), std::ref(network_dims), raw_boxes_outputs.size(), strided_width, strided_height);
199 auto regression_distance = xt::reshape_view(xt::arange(0, regression_length + 1), {1, 1, regression_length + 1});
201 for (uint i = 0; i < raw_boxes_outputs.size(); i++)
204 float32_t qp_scale = raw_boxes_outputs[i]->vstream_info().quant_info.qp_scale;
205 float32_t qp_zp = raw_boxes_outputs[i]->vstream_info().quant_info.qp_zp;
207 auto output_b = common::get_xtensor(raw_boxes_outputs[i]);
208 int num_proposals = output_b.shape(0) * output_b.shape(1);
209 auto output_boxes = xt::view(output_b, xt::all(), xt::all(), xt::all());
210 xt::xarray<uint8_t> quantized_boxes = xt::reshape_view(output_boxes, {num_proposals, 4, regression_length + 1});
212 auto shape = {quantized_boxes.shape(1), quantized_boxes.shape(2)};
215 float32_t qp_scale_kpts = raw_keypoints[i]->vstream_info().quant_info.qp_scale;
216 float32_t qp_zp_kpts = raw_keypoints[i]->vstream_info().quant_info.qp_zp;
218 xt::xarray<uint16_t> output_keypoints = common::get_xtensor_uint16(raw_keypoints[i]);
219 int num_proposals_keypoints = output_keypoints.shape(0) * output_keypoints.shape(1);
220 xt::xarray<uint16_t> output_keypoints_quantized = xt::view(output_keypoints, xt::all(), xt::all(), xt::all());
221 xt::xarray<uint16_t> quantized_keypoints = xt::reshape_view(output_keypoints_quantized, {num_proposals_keypoints, 17, 3});
223 auto keypoints_shape = {quantized_keypoints.shape(1), quantized_keypoints.shape(2)};
226 for (uint j = 0; j < uint(num_proposals); j++) {
227 confidence = xt::row(scores, instance_index)(0);
229 if (confidence < score_threshold)
232 xt::xarray<float> box(shape);
233 xt::xarray<float> kpts_corrdinates_and_scores(keypoints_shape);
236 box.shape(0), box.shape(1),
238 common::softmax_2D(box.data(), box.shape(0), box.shape(1));
240 auto box_distance = box * regression_distance;
241 xt::xarray<float> reduced_distances = xt::sum(box_distance, {2});
242 auto strided_distances = reduced_distances * strides[i];
245 auto distance_view1 = xt::view(strided_distances, xt::all(), xt::range(_, 2)) * -1;
246 auto distance_view2 = xt::view(strided_distances, xt::all(), xt::range(2, _));
247 auto distance_view = xt::concatenate(xt::xtuple(distance_view1, distance_view2), 1);
248 auto decoded_box = centers[i] + distance_view;
250 HailoBBox bbox(decoded_box(j, 0) / network_dims[0],
251 decoded_box(j, 1) / network_dims[1],
252 (decoded_box(j, 2) - decoded_box(j, 0)) / network_dims[0],
253 (decoded_box(j, 3) - decoded_box(j, 1)) / network_dims[1]);
255 label = common::coco_eighty[class_index + 1];
256 HailoDetection detected_instance(bbox, class_index, label, confidence);
260 kpts_corrdinates_and_scores.shape(0), kpts_corrdinates_and_scores.shape(1),
261 qp_scale_kpts, qp_zp_kpts);
263 auto kpts_corrdinates = xt::view(kpts_corrdinates_and_scores, xt::all(), xt::range(0, 2));
264 auto keypoints_scores = xt::view(kpts_corrdinates_and_scores, xt::all(), xt::range(2, xt::placeholders::_));
266 kpts_corrdinates *= 2;
268 auto center = xt::view(centers[i], xt::all(), xt::range(0, 2));
269 auto center_values = xt::xarray<float>{(float)center(j,0), (float)center(j,1)};
271 kpts_corrdinates = strides[i] * (kpts_corrdinates - 0.5) + center_values;
274 auto sigmoided_scores = 1 / (1 + xt::exp(-keypoints_scores));
276 auto keypoint = std::make_pair(kpts_corrdinates, sigmoided_scores);
278 decodings.push_back(Decodings{detected_instance, keypoint, joint_pairs});
287 std::vector<HailoTensorPtr> outputs_boxes(tensors.size() / 3);
288 std::vector<HailoTensorPtr> outputs_keypoints(tensors.size() / 3);
291 int total_scores = 0;
292 for (uint i = 0; i < tensors.size(); i = i + 3) {
293 total_scores += tensors[i+1]->width() * tensors[i+1]->height();
296 std::vector<size_t> scores_shape = { (
long unsigned int)total_scores, (
long unsigned int)num_classes};
298 xt::xarray<float> scores(scores_shape);
300 int view_index_scores = 0;
302 for (uint i = 0; i < tensors.size(); i = i + 3)
305 outputs_boxes[i / 3] = tensors[i];
308 auto dequantized_output_s = common::dequantize(common::get_xtensor(tensors[i+1]), tensors[i+1]->vstream_info().quant_info.qp_scale, tensors[i+1]->vstream_info().quant_info.qp_zp);
309 int num_proposals_scores = dequantized_output_s.shape(0)*dequantized_output_s.shape(1);
312 auto output_scores = xt::view(dequantized_output_s, xt::all(), xt::all(), xt::all());
313 xt::view(scores, xt::range(view_index_scores, view_index_scores + num_proposals_scores), xt::all()) = xt::reshape_view(output_scores, {num_proposals_scores, num_classes});
314 view_index_scores += num_proposals_scores;
317 outputs_keypoints[i / 3] = tensors[i+2];
320 return Triple{outputs_boxes, scores, outputs_keypoints};
std::vector< Decodings > nms(std::vector< Decodings > &decodings, const float iou_thr, bool should_nms_cross_classes=false)
std::vector< Decodings > decode_boxes_and_keypoints(std::vector< HailoTensorPtr > raw_boxes_outputs, xt::xarray< float > scores, std::vector< HailoTensorPtr > raw_keypoints, std::vector< int > network_dims, std::vector< int > strides, int regression_length, float score_threshold)
void dequantize_box_values(xt::xarray< float > &dequantized_outputs, int index, xt::xarray< uint8_t > &quantized_outputs, size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp)
void dequantize_box_values16(xt::xarray< float > &dequantized_outputs, int index, xt::xarray< uint16_t > &quantized_outputs, size_t dim1, size_t dim2, float32_t qp_scale, float32_t qp_zp)
std::pair< std::vector< KeyPt >, std::vector< PairPairs > > filter_keypoints(std::vector< Decodings > filtered_decodings, std::vector< int > network_dims, float joint_threshold)
std::vector< Decodings > yolov8pose_postprocess(std::vector< HailoTensorPtr > &tensors, std::vector< int > network_dims, std::vector< int > strides, int regression_length, int num_classes, float score_threshold, float iou_threshold)
std::vector< xt::xarray< double > > get_centers(std::vector< int > &strides, std::vector< int > &network_dims, std::size_t boxes_num, int strided_width, int strided_height)