JeVoisBase  1.23
JeVois Smart Embedded Machine Vision Toolkit Base Modules
1import pyjevois
2if import libjevoispro as jevois
3else: import libjevois as jevois
4import cv2 as cv
5import numpy as np
6import sys
8## Object detection and recognition using OpenCV Deep Neural Networks (DNN)
10# This module runs an object detection deep neural network using the OpenCV DNN
11# library. Detection networks analyze a whole scene and produce a number of
12# bounding boxes around detected objects, together with identity labels
13# and confidence scores for each detected box.
15# This module supports detection networks implemented in TensorFlow, Caffe,
16# Darknet, Torch, etc as supported by the OpenCV DNN module.
18# Included with the standard JeVois distribution are:
20# - OpenCV Face Detector, Caffe model
21# - MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
22# - MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
23# - MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
24# - Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
25# - Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
27# See the module's constructor (__init__) code and select a value for \b model to switch network. Object categories are
28# as follows:
29# - The 80 COCO object categories are: person, bicycle, car, motorbike, aeroplane, bus, train, truck, boat, traffic,
30# fire, stop, parking, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella,
31# handbag, tie, suitcase, frisbee, skis, snowboard, sports, kite, baseball, baseball, skateboard, surfboard, tennis,
32# bottle, wine, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot, pizza, donut,
33# cake, chair, sofa, pottedplant, bed, diningtable, toilet, tvmonitor, laptop, mouse, remote, keyboard, cell,
34# microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy, hair, toothbrush.
36# - The 20 Pascal-VOC object categories are: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow,
37# diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor.
39# Sometimes it will make mistakes! The performance of yolov3-tiny is about 33.1% correct (mean average precision) on
40# the COCO test set. The OpenCV Face Detector is quite fast and robust!
42# This module is adapted from the sample OpenCV code:
45# More pre-trained models are available on github in opencv_extra
48# @author Laurent Itti
50# @videomapping YUYV 640 502 20.0 YUYV 640 480 20.0 JeVois PyDetectionDNN
51# @email
52# @address 880 W 1st St Suite 807, Los Angeles CA 90012, USA
53# @copyright Copyright (C) 2018 by Laurent Itti
54# @mainurl
55# @supporturl
56# @otherurl
57# @license GPL v3
58# @distribution Unrestricted
59# @restrictions None
60# @ingroup modules
62 # ####################################################################################################
63 ## Constructor
64 def __init__(self):
65 self.confThreshold = 0.5 # Confidence threshold (0..1), higher for stricter detection confidence.
66 self.nmsThreshold = 0.4 # Non-maximum suppression threshold (0..1), higher to remove more duplicate boxes.
67 self.inpWidth = 160 # Resized image width passed to network
68 self.inpHeight = 120 # Resized image height passed to network
69 self.scale = 2/255 # Value scaling factor applied to input pixels
70 self.mean = [127.5, 127.5, 127.5] # Mean BGR value subtracted from input image
71 self.rgb = True # True if model expects RGB inputs, otherwise it expects BGR
73 # Select one of the models:
74 model = 'Face' # OpenCV Face Detector, Caffe model
75 #model = 'MobileNetV2SSD' # MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
76 #model = 'MobileNetSSD' # MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
77 #model = 'MobileNetSSDcoco' # MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
78 #model = 'YOLOv3' # Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
79 #model = 'YOLOv2' # Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
81 # You should not have to edit anything beyond this point.
82 backend = cv.dnn.DNN_BACKEND_OPENCV
83 target = cv.dnn.DNN_TARGET_CPU
84 self.classes = None
85 classnames = None
86 if (model == 'MobileNetSSD'):
87 classnames = pyjevois.share + '/darknet/yolo/data/voc.names'
88 modelname = pyjevois.share + '/opencv-dnn/detection/MobileNetSSD_deploy.caffemodel'
89 configname = pyjevois.share + '/opencv-dnn/detection/MobileNetSSD_deploy.prototxt'
90 self.rgb = False
91 elif (model == 'MobileNetV2SSD'):
92 classnames = pyjevois.share + '/darknet/yolo/data/coco.names'
93 modelname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v2_coco_2018_03_29.pb'
94 configname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v2_coco_2018_03_29.pbtxt'
95 elif (model == 'MobileNetSSDcoco'):
96 classnames = pyjevois.share + '/darknet/yolo/data/coco.names'
97 modelname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v1_coco_2017_11_17.pb'
98 configname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v1_coco_2017_11_17.pbtxt'
99 self.rgb = False
100 self.nmsThreshold = 0.1
101 elif (model == 'YOLOv3'):
102 classnames = pyjevois.share + '/darknet/yolo/data/coco.names'
103 modelname = pyjevois.share + '/darknet/yolo/weights/yolov3-tiny.weights'
104 configname = pyjevois.share + '/darknet/yolo/cfg/yolov3-tiny.cfg'
105 elif (model == 'YOLOv2'):
106 classnames = pyjevois.share + '/darknet/yolo/data/voc.names'
107 modelname = pyjevois.share + '/darknet/yolo/weights/yolov2-tiny-voc.weights'
108 configname = pyjevois.share + '/darknet/yolo/cfg/yolov2-tiny-voc.cfg'
109 self.inpWidth = 320
110 self.inpHeight = 240
111 else:
112 classnames = pyjevois.share + '/opencv-dnn/detection/opencv_face_detector.classes'
113 modelname = pyjevois.share + '/opencv-dnn/detection/opencv_face_detector.caffemodel'
114 configname = pyjevois.share + '/opencv-dnn/detection/opencv_face_detector.prototxt'
115 self.scale = 1.0
116 self.mean = [104.0, 177.0, 123.0]
117 self.rgb = False
119 # Load names of classes
120 if classnames:
121 with open(classnames, 'rt') as f:
122 self.classes ='\n').split('\n')
124 # Load a network
125 = cv.dnn.readNet(modelname, configname)
128 self.timer = jevois.Timer('Neural detection', 10, jevois.LOG_DEBUG)
129 self.model = model
130 self.outNames =
132 # ####################################################################################################
133 ## Analyze and draw boxes, object names, and confidence scores
134 def postprocess(self, frame, outs):
135 frameHeight = frame.shape[0]
136 frameWidth = frame.shape[1]
138 def drawPred(classId, conf, left, top, right, bottom):
139 # Draw a bounding box.
140 cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
142 label = '%.2f' % (conf * 100)
144 # Print a label of class.
145 if self.classes:
146 if (classId >= len(self.classes)):
147 label = 'Oooops id=%d: %s' % (classId, label)
148 else:
149 label = '%s: %s' % (self.classes[classId], label)
151 labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.4, 1)
152 top = max(top, labelSize[1])
153 cv.rectangle(frame, (left, top - labelSize[1]-2), (left + labelSize[0], top + baseLine),
154 (255, 255, 255), cv.FILLED)
155 cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0))
157 layerNames =
158 lastLayerId =[-1])
159 lastLayer =
161 classIds = []
162 confidences = []
163 boxes = []
164 if'im_info') != -1: # Faster-RCNN or R-FCN
165 # Network produces output blob with a shape 1x1xNx7 where N is a number of
166 # detections and an every detection is a vector of values
167 # [batchId, classId, confidence, left, top, right, bottom]
168 for out in outs:
169 for detection in out[0, 0]:
170 confidence = detection[2]
171 if confidence > self.confThreshold:
172 left = int(detection[3])
173 top = int(detection[4])
174 right = int(detection[5])
175 bottom = int(detection[6])
176 width = right - left + 1
177 height = bottom - top + 1
178 classIds.append(int(detection[1]) - 1) # Skip background label
179 confidences.append(float(confidence))
180 boxes.append([left, top, width, height])
181 elif lastLayer.type == 'DetectionOutput':
182 # Network produces output blob with a shape 1x1xNx7 where N is a number of
183 # detections and an every detection is a vector of values
184 # [batchId, classId, confidence, left, top, right, bottom]
185 for out in outs:
186 for detection in out[0, 0]:
187 confidence = detection[2]
188 if confidence > self.confThreshold:
189 left = int(detection[3] * frameWidth)
190 top = int(detection[4] * frameHeight)
191 right = int(detection[5] * frameWidth)
192 bottom = int(detection[6] * frameHeight)
193 width = right - left + 1
194 height = bottom - top + 1
195 classIds.append(int(detection[1]) - 1) # Skip background label
196 confidences.append(float(confidence))
197 boxes.append([left, top, width, height])
198 elif lastLayer.type == 'Region':
199 # Network produces output blob with a shape NxC where N is a number of
200 # detected objects and C is a number of classes + 4 where the first 4
201 # numbers are [center_x, center_y, width, height]
202 classIds = []
203 confidences = []
204 boxes = []
205 for out in outs:
206 for detection in out:
207 scores = detection[5:]
208 classId = np.argmax(scores)
209 confidence = scores[classId]
210 if confidence > self.confThreshold:
211 center_x = int(detection[0] * frameWidth)
212 center_y = int(detection[1] * frameHeight)
213 width = int(detection[2] * frameWidth)
214 height = int(detection[3] * frameHeight)
215 left = int(center_x - width / 2)
216 top = int(center_y - height / 2)
217 classIds.append(classId)
218 confidences.append(float(confidence))
219 boxes.append([left, top, width, height])
220 else:
221 jevois.LERROR('Unknown output layer type: ' + lastLayer.type)
222 return
224 # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
225 # or NMS is required if number of outputs > 1
226 if len(self.outNames) > 1:
227 indices = []
228 classIds = np.array(classIds)
229 boxes = np.array(boxes)
230 confidences = np.array(confidences)
231 unique_classes = set(classIds)
232 for cl in unique_classes:
233 class_indices = np.where(classIds == cl)[0]
234 conf = confidences[class_indices]
235 box = boxes[class_indices].tolist()
236 nms_indices = cv.dnn.NMSBoxes(box, conf, self.confThreshold, self.nmsThreshold)
237 #nms_indices = nms_indices[:, 0] if len(nms_indices) else []
238 indices.extend(class_indices[nms_indices])
239 else:
240 indices = np.arange(0, len(classIds))
242 for i in indices:
243 box = boxes[i]
244 left = box[0]
245 top = box[1]
246 width = box[2]
247 height = box[3]
248 drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
250 # ####################################################################################################
251 ## JeVois main processing function
252 def process(self, inframe, outframe):
253 frame = inframe.getCvBGR()
254 self.timer.start()
256 frameHeight = frame.shape[0]
257 frameWidth = frame.shape[1]
259 # Create a 4D blob from a frame.
260 blob = cv.dnn.blobFromImage(frame, self.scale, (self.inpWidth, self.inpHeight), self.mean, self.rgb, crop=False)
262 # Run a model
264 if'im_info') != -1: # Faster-RCNN or R-FCN
265 frame = cv.resize(frame, (self.inpWidth, self.inpHeight))
266[self.inpHeight, self.inpWidth, 1.6], dtype=np.float32), 'im_info')
267 outs =
269 self.postprocess(frame, outs)
271 # Create dark-gray (value 80) image for the bottom panel, 22 pixels tall:
272 msgbox = np.zeros((22, frame.shape[1], 3), dtype = np.uint8) + 80
274 # Put efficiency information.
275 cv.putText(frame, 'JeVois Python Object Detection DNN - ' + self.model, (3, 15),
276 cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv.LINE_AA)
277 t, _ =
278 fps = self.timer.stop()
279 label = fps + ' - Inference time: %.2fms' % (t * 1000.0 / cv.getTickFrequency())
280 cv.putText(msgbox, label, (3, 15), cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv.LINE_AA)
282 # Stack bottom panel below main image:
283 frame = np.vstack((frame, msgbox))
285 # Send output frame to host:
286 outframe.sendCv(frame)
