JeVoisBase  1.23
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
Loading...
Searching...
No Matches
create-yolo-jevois-npu-dataset.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2
3import numpy as np
4import os
5from PIL import Image
6import random
7import subprocess
8import onnxruntime as ort
9
10def create_dataset(width, height, nc, numimages):
11 numtexts = 365 # number of category labels in the quantization dataset
12 imgdir='Objects365/images/val'
13 txtdir='Objects365/labels/val'
14
15 txt_embeds = np.ndarray((numtexts, 512), np.float32)
16
17 images = np.ndarray((numimages, 3, height, width), np.float32)
18 txt = np.ndarray((numimages, nc, 512), np.float32)
19
20 txt0 = np.ndarray((numimages, 4, 32, nc), np.float32)
21 txt1 = np.ndarray((numimages, 2, 32, nc), np.float32)
22 txt2 = np.ndarray((numimages, 4, 32, nc), np.float32)
23 txt3 = np.ndarray((numimages, 8, 32, nc), np.float32)
24 txt4 = np.ndarray((numimages, nc, 512), np.float32)
25
26
27
28 # read all the text embeddings into one array, for the 365 object labels
29 print("Loading all text embeddings...")
30 idx = 0
31 for i in range(numtexts):
32 fname = f"text_vec_{i}.npy"
33 if os.path.isfile(fname):
34 e = np.load(fname)
35 e /= np.linalg.norm(e[0,:]) # our model expects unit-norm text features
36 txt_embeds[idx, :] = e
37 idx += 1
38 else:
39 print(f"Cannot load {fname}")
40 exit(1)
41
42 # Read each image and get nc labels, in randomized order but including as many of that image's labels as possible:
43 print("Processing images...")
44 allimages = list(filter(lambda f: f.endswith('.jpg'), os.listdir(imgdir)))
45 selected_images = random.sample(allimages, numimages)
46
47 # load the oonx text processor
48 session = ort.InferenceSession(f"yolov8s-jevois-{width}x{height}-{nc}c-txt.onnx")
49
50
51 idx = 0
52 for imgname in selected_images:
53 fname = os.path.join(imgdir, imgname)
54 if not os.path.isfile(fname):
55 print(f"Cannot load {fname}")
56 exit(1)
57
58 image = Image.open(fname).resize((width, height));
59 arr = np.array(image).astype(np.float32)
60 arr = (arr - 0.0) / 255.0 # pre-processing. Here: mean=[0 0 0], scale=1/255 but varies by model
61 arr = np.transpose(arr, (2, 0, 1))
62 images[idx, :, :, :] = arr
63
64 # Get the categories present in that image:
65 labelpath = os.path.join(txtdir, imgname.replace('.jpg', '.txt'))
66 x = subprocess.run("cat " + str(labelpath) + " | awk '{ print $1 }' | sort -n | uniq",
67 shell=True, capture_output=True, text=True)
68 categs_str = x.stdout.splitlines()
69 categs = [int(c) for c in categs_str]
70
71 print(f"categs in {labelpath}: {categs}")
72
73 random.shuffle(categs)
74 extra = list(range(numtexts))
75 random.shuffle(extra)
76 categs += extra
77 categs = categs[0:nc]
78
79 print(f"selected categs in {labelpath}: {categs}")
80
81 for i in range(nc):
82 txt[idx, i, :] = txt_embeds[categs[i]]
83
84 # Run the text processing model:
85 txtin = txt[idx, :, :]
86 txtin = np.expand_dims(txtin, axis=0)
87
88 outs = session.run(None, { "txt_feats": txtin })
89 txt0[idx, :, :, :] = outs[0]
90 txt1[idx, :, :, :] = outs[1]
91 txt2[idx, :, :, :] = outs[2]
92 txt3[idx, :, :, :] = outs[3]
93 txt4[idx, :, :] = outs[4]
94
95 idx += 1
96
97 with open(f"objects365-images-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, images)
98 with open(f"objects365-texts-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, txt)
99 with open(f"objects365-txt0-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, txt0)
100 with open(f"objects365-txt1-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, txt1)
101 with open(f"objects365-txt2-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, txt2)
102 with open(f"objects365-txt3-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, txt3)
103 with open(f"objects365-txt4-{width}x{height}-{nc}.npy", 'wb') as f: np.save(f, txt4)
104
105
106
107classes=[1, 8, 16, 32, 64]
108
109numimages = 5000 # 20000 ok with 512x288; 80000 possible but crashes NPU converter
110for nc in classes:
111 create_dataset(512, 288, nc, numimages)
112 create_dataset(1024, 576, nc, numimages)
113
create_dataset(width, height, nc, numimages)