JeVoisBase  1.22
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
Loading...
Searching...
No Matches
PyDetectionDNN.py
Go to the documentation of this file.
1import pyjevois
2if pyjevois.pro: import libjevoispro as jevois
3else: import libjevois as jevois
4import cv2 as cv
5import numpy as np
6import sys
7
8## Object detection and recognition using OpenCV Deep Neural Networks (DNN)
9#
10# This module runs an object detection deep neural network using the OpenCV DNN
11# library. Detection networks analyze a whole scene and produce a number of
12# bounding boxes around detected objects, together with identity labels
13# and confidence scores for each detected box.
14#
15# This module supports detection networks implemented in TensorFlow, Caffe,
16# Darknet, Torch, etc as supported by the OpenCV DNN module.
17#
18# Included with the standard JeVois distribution are:
19#
20# - OpenCV Face Detector, Caffe model
21# - MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
22# - MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
23# - MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
24# - Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
25# - Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
26#
27# See the module's constructor (__init__) code and select a value for \b model to switch network. Object categories are
28# as follows:
29# - The 80 COCO object categories are: person, bicycle, car, motorbike, aeroplane, bus, train, truck, boat, traffic,
30# fire, stop, parking, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella,
31# handbag, tie, suitcase, frisbee, skis, snowboard, sports, kite, baseball, baseball, skateboard, surfboard, tennis,
32# bottle, wine, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot, pizza, donut,
33# cake, chair, sofa, pottedplant, bed, diningtable, toilet, tvmonitor, laptop, mouse, remote, keyboard, cell,
34# microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy, hair, toothbrush.
35#
36# - The 20 Pascal-VOC object categories are: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow,
37# diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor.
38#
39# Sometimes it will make mistakes! The performance of yolov3-tiny is about 33.1% correct (mean average precision) on
40# the COCO test set. The OpenCV Face Detector is quite fast and robust!
41#
42# This module is adapted from the sample OpenCV code:
43# https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.py
44#
45# More pre-trained models are available on github in opencv_extra
46#
47#
48# @author Laurent Itti
49#
50# @videomapping YUYV 640 502 20.0 YUYV 640 480 20.0 JeVois PyDetectionDNN
51# @email itti@usc.edu
52# @address 880 W 1st St Suite 807, Los Angeles CA 90012, USA
53# @copyright Copyright (C) 2018 by Laurent Itti
54# @mainurl http://jevois.org
55# @supporturl http://jevois.org
56# @otherurl http://jevois.org
57# @license GPL v3
58# @distribution Unrestricted
59# @restrictions None
60# @ingroup modules
62 # ####################################################################################################
63 ## Constructor
64 def __init__(self):
65 self.confThreshold = 0.5 # Confidence threshold (0..1), higher for stricter detection confidence.
66 self.nmsThreshold = 0.4 # Non-maximum suppression threshold (0..1), higher to remove more duplicate boxes.
67 self.inpWidth = 160 # Resized image width passed to network
68 self.inpHeight = 120 # Resized image height passed to network
69 self.scale = 2/255 # Value scaling factor applied to input pixels
70 self.mean = [127.5, 127.5, 127.5] # Mean BGR value subtracted from input image
71 self.rgb = True # True if model expects RGB inputs, otherwise it expects BGR
72
73 # Select one of the models:
74 model = 'Face' # OpenCV Face Detector, Caffe model
75 #model = 'MobileNetV2SSD' # MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
76 #model = 'MobileNetSSD' # MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
77 #model = 'MobileNetSSDcoco' # MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
78 #model = 'YOLOv3' # Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
79 #model = 'YOLOv2' # Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
80
81 # You should not have to edit anything beyond this point.
82 backend = cv.dnn.DNN_BACKEND_OPENCV
83 target = cv.dnn.DNN_TARGET_CPU
84 self.classes = None
85 classnames = None
86 if (model == 'MobileNetSSD'):
87 classnames = pyjevois.share + '/darknet/yolo/data/voc.names'
88 modelname = pyjevois.share + '/opencv-dnn/detection/MobileNetSSD_deploy.caffemodel'
89 configname = pyjevois.share + '/opencv-dnn/detection/MobileNetSSD_deploy.prototxt'
90 self.rgb = False
91 elif (model == 'MobileNetV2SSD'):
92 classnames = pyjevois.share + '/darknet/yolo/data/coco.names'
93 modelname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v2_coco_2018_03_29.pb'
94 configname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v2_coco_2018_03_29.pbtxt'
95 elif (model == 'MobileNetSSDcoco'):
96 classnames = pyjevois.share + '/darknet/yolo/data/coco.names'
97 modelname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v1_coco_2017_11_17.pb'
98 configname = pyjevois.share + '/opencv-dnn/detection/ssd_mobilenet_v1_coco_2017_11_17.pbtxt'
99 self.rgb = False
100 self.nmsThreshold = 0.1
101 elif (model == 'YOLOv3'):
102 classnames = pyjevois.share + '/darknet/yolo/data/coco.names'
103 modelname = pyjevois.share + '/darknet/yolo/weights/yolov3-tiny.weights'
104 configname = pyjevois.share + '/darknet/yolo/cfg/yolov3-tiny.cfg'
105 elif (model == 'YOLOv2'):
106 classnames = pyjevois.share + '/darknet/yolo/data/voc.names'
107 modelname = pyjevois.share + '/darknet/yolo/weights/yolov2-tiny-voc.weights'
108 configname = pyjevois.share + '/darknet/yolo/cfg/yolov2-tiny-voc.cfg'
109 self.inpWidth = 320
110 self.inpHeight = 240
111 else:
112 classnames = pyjevois.share + '/opencv-dnn/detection/opencv_face_detector.classes'
113 modelname = pyjevois.share + '/opencv-dnn/detection/opencv_face_detector.caffemodel'
114 configname = pyjevois.share + '/opencv-dnn/detection/opencv_face_detector.prototxt'
115 self.scale = 1.0
116 self.mean = [104.0, 177.0, 123.0]
117 self.rgb = False
118
119 # Load names of classes
120 if classnames:
121 with open(classnames, 'rt') as f:
122 self.classes = f.read().rstrip('\n').split('\n')
123
124 # Load a network
125 self.net = cv.dnn.readNet(modelname, configname)
126 self.net.setPreferableBackend(backend)
127 self.net.setPreferableTarget(target)
128 self.timer = jevois.Timer('Neural detection', 10, jevois.LOG_DEBUG)
129 self.model = model
130 self.outNames = self.net.getUnconnectedOutLayersNames()
131
132 # ####################################################################################################
133 ## Analyze and draw boxes, object names, and confidence scores
134 def postprocess(self, frame, outs):
135 frameHeight = frame.shape[0]
136 frameWidth = frame.shape[1]
137
138 def drawPred(classId, conf, left, top, right, bottom):
139 # Draw a bounding box.
140 cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
141
142 label = '%.2f' % (conf * 100)
143
144 # Print a label of class.
145 if self.classes:
146 if (classId >= len(self.classes)):
147 label = 'Oooops id=%d: %s' % (classId, label)
148 else:
149 label = '%s: %s' % (self.classes[classId], label)
150
151 labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.4, 1)
152 top = max(top, labelSize[1])
153 cv.rectangle(frame, (left, top - labelSize[1]-2), (left + labelSize[0], top + baseLine),
154 (255, 255, 255), cv.FILLED)
155 cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0))
156
157 layerNames = self.net.getLayerNames()
158 lastLayerId = self.net.getLayerId(layerNames[-1])
159 lastLayer = self.net.getLayer(lastLayerId)
160
161 classIds = []
162 confidences = []
163 boxes = []
164 if self.net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
165 # Network produces output blob with a shape 1x1xNx7 where N is a number of
166 # detections and an every detection is a vector of values
167 # [batchId, classId, confidence, left, top, right, bottom]
168 for out in outs:
169 for detection in out[0, 0]:
170 confidence = detection[2]
171 if confidence > self.confThreshold:
172 left = int(detection[3])
173 top = int(detection[4])
174 right = int(detection[5])
175 bottom = int(detection[6])
176 width = right - left + 1
177 height = bottom - top + 1
178 classIds.append(int(detection[1]) - 1) # Skip background label
179 confidences.append(float(confidence))
180 boxes.append([left, top, width, height])
181 elif lastLayer.type == 'DetectionOutput':
182 # Network produces output blob with a shape 1x1xNx7 where N is a number of
183 # detections and an every detection is a vector of values
184 # [batchId, classId, confidence, left, top, right, bottom]
185 for out in outs:
186 for detection in out[0, 0]:
187 confidence = detection[2]
188 if confidence > self.confThreshold:
189 left = int(detection[3] * frameWidth)
190 top = int(detection[4] * frameHeight)
191 right = int(detection[5] * frameWidth)
192 bottom = int(detection[6] * frameHeight)
193 width = right - left + 1
194 height = bottom - top + 1
195 classIds.append(int(detection[1]) - 1) # Skip background label
196 confidences.append(float(confidence))
197 boxes.append([left, top, width, height])
198 elif lastLayer.type == 'Region':
199 # Network produces output blob with a shape NxC where N is a number of
200 # detected objects and C is a number of classes + 4 where the first 4
201 # numbers are [center_x, center_y, width, height]
202 classIds = []
203 confidences = []
204 boxes = []
205 for out in outs:
206 for detection in out:
207 scores = detection[5:]
208 classId = np.argmax(scores)
209 confidence = scores[classId]
210 if confidence > self.confThreshold:
211 center_x = int(detection[0] * frameWidth)
212 center_y = int(detection[1] * frameHeight)
213 width = int(detection[2] * frameWidth)
214 height = int(detection[3] * frameHeight)
215 left = int(center_x - width / 2)
216 top = int(center_y - height / 2)
217 classIds.append(classId)
218 confidences.append(float(confidence))
219 boxes.append([left, top, width, height])
220 else:
221 jevois.LERROR('Unknown output layer type: ' + lastLayer.type)
222 return
223
224 # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
225 # or NMS is required if number of outputs > 1
226 if len(self.outNames) > 1:
227 indices = []
228 classIds = np.array(classIds)
229 boxes = np.array(boxes)
230 confidences = np.array(confidences)
231 unique_classes = set(classIds)
232 for cl in unique_classes:
233 class_indices = np.where(classIds == cl)[0]
234 conf = confidences[class_indices]
235 box = boxes[class_indices].tolist()
236 nms_indices = cv.dnn.NMSBoxes(box, conf, self.confThreshold, self.nmsThreshold)
237 #nms_indices = nms_indices[:, 0] if len(nms_indices) else []
238 indices.extend(class_indices[nms_indices])
239 else:
240 indices = np.arange(0, len(classIds))
241
242 for i in indices:
243 box = boxes[i]
244 left = box[0]
245 top = box[1]
246 width = box[2]
247 height = box[3]
248 drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
249
250 # ####################################################################################################
251 ## JeVois main processing function
252 def process(self, inframe, outframe):
253 frame = inframe.getCvBGR()
254 self.timer.start()
255
256 frameHeight = frame.shape[0]
257 frameWidth = frame.shape[1]
258
259 # Create a 4D blob from a frame.
260 blob = cv.dnn.blobFromImage(frame, self.scale, (self.inpWidth, self.inpHeight), self.mean, self.rgb, crop=False)
261
262 # Run a model
263 self.net.setInput(blob)
264 if self.net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
265 frame = cv.resize(frame, (self.inpWidth, self.inpHeight))
266 self.net.setInput(np.array([self.inpHeight, self.inpWidth, 1.6], dtype=np.float32), 'im_info')
267 outs = self.net.forward(self.outNames)
268
269 self.postprocess(frame, outs)
270
271 # Create dark-gray (value 80) image for the bottom panel, 22 pixels tall:
272 msgbox = np.zeros((22, frame.shape[1], 3), dtype = np.uint8) + 80
273
274 # Put efficiency information.
275 cv.putText(frame, 'JeVois Python Object Detection DNN - ' + self.model, (3, 15),
276 cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv.LINE_AA)
277 t, _ = self.net.getPerfProfile()
278 fps = self.timer.stop()
279 label = fps + ' - Inference time: %.2fms' % (t * 1000.0 / cv.getTickFrequency())
280 cv.putText(msgbox, label, (3, 15), cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv.LINE_AA)
281
282 # Stack bottom panel below main image:
283 frame = np.vstack((frame, msgbox))
284
285 # Send output frame to host:
286 outframe.sendCv(frame)
Object detection and recognition using OpenCV Deep Neural Networks (DNN)
process(self, inframe, outframe)
JeVois main processing function.
postprocess(self, frame, outs)
Analyze and draw boxes, object names, and confidence scores.