JeVoisBase  1.10
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
PyDetectionDNN.py
Go to the documentation of this file.
1 import libjevois as jevois
2 import cv2 as cv
3 import numpy as np
4 import sys
5 
6 ## Object detection and recognition using OpenCV Deep Neural Networks (DNN)
7 #
8 # This module runs an object detection deep neural network using the OpenCV DNN
9 # library. Detection networks analyze a whole scene and produce a number of
10 # bounding boxes around detected objects, together with identity labels
11 # and confidence scores for each detected box.
12 #
13 # This module supports detection networks implemented in TensorFlow, Caffe,
14 # Darknet, Torch, etc as supported by the OpenCV DNN module.
15 #
16 # Included with the standard JeVois distribution are:
17 #
18 # - OpenCV Face Detector, Caffe model
19 # - MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
20 # - MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
21 # - MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
22 # - Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
23 # - Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
24 #
25 # See the module's constructor (__init__) code and select a value for \b model to switch network. Object categories are
26 # as follows:
27 # - The 80 COCO object categories are: person, bicycle, car, motorbike, aeroplane, bus, train, truck, boat, traffic,
28 # fire, stop, parking, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella,
29 # handbag, tie, suitcase, frisbee, skis, snowboard, sports, kite, baseball, baseball, skateboard, surfboard, tennis,
30 # bottle, wine, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot, pizza, donut,
31 # cake, chair, sofa, pottedplant, bed, diningtable, toilet, tvmonitor, laptop, mouse, remote, keyboard, cell,
32 # microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy, hair, toothbrush.
33 #
34 # - The 20 Pascal-VOC object categories are: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow,
35 # diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor.
36 #
37 # Sometimes it will make mistakes! The performance of yolov3-tiny is about 33.1% correct (mean average precision) on
38 # the COCO test set. The OpenCV Face Detector is quite fast and robust!
39 #
40 # This module is adapted from the sample OpenCV code:
41 # https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.py
42 #
43 # More pre-trained models are available on github in opencv_extra
44 #
45 #
46 # @author Laurent Itti
47 #
48 # @videomapping YUYV 640 502 20.0 YUYV 640 480 20.0 JeVois PyDetectionDNN
49 # @email itti@usc.edu
50 # @address 880 W 1st St Suite 807, Los Angeles CA 90012, USA
51 # @copyright Copyright (C) 2018 by Laurent Itti
52 # @mainurl http://jevois.org
53 # @supporturl http://jevois.org
54 # @otherurl http://jevois.org
55 # @license GPL v3
56 # @distribution Unrestricted
57 # @restrictions None
58 # @ingroup modules
60  # ####################################################################################################
61  ## Constructor
62  def __init__(self):
63  self.confThreshold = 0.5 # Confidence threshold (0..1), higher for stricter detection confidence.
64  self.nmsThreshold = 0.4 # Non-maximum suppression threshold (0..1), higher to remove more duplicate boxes.
65  self.inpWidth = 160 # Resized image width passed to network
66  self.inpHeight = 120 # Resized image width passed to network
67  self.scale = 2/255 # Value scaling factor applied to input pixels
68  self.mean = [127.5, 127.5, 127.5] # Mean BGR value subtracted from input image
69  self.rgb = True # True if model expects RGB inputs, otherwise it expects BGR
70 
71  # Select one of the models:
72  model = 'Face' # OpenCV Face Detector, Caffe model
73  #model = 'MobileNetV2SSD' # MobileNet v2 + SSD trained on Coco (80 object classes), TensorFlow model
74  #model = 'MobileNetSSD' # MobileNet + SSD trained on Pascal VOC (20 object classes), Caffe model
75  #model = 'MobileNetSSDcoco' # MobileNet + SSD trained on Coco (80 object classes), TensorFlow model
76  #model = 'YOLOv3' # Darknet Tiny YOLO v3 trained on Coco (80 object classes), Darknet model
77  #model = 'YOLOv2' # Darknet Tiny YOLO v2 trained on Pascal VOC (20 object classes), Darknet model
78 
79  # You should not have to edit anything beyond this point.
80  backend = cv.dnn.DNN_BACKEND_DEFAULT
81  target = cv.dnn.DNN_TARGET_CPU
82  self.classes = None
83  classnames = None
84  if (model == 'MobileNetSSD'):
85  classnames = '/jevois/share/darknet/yolo/data/voc.names'
86  modelname = '/jevois/share/opencv-dnn/detection/MobileNetSSD_deploy.caffemodel'
87  configname = '/jevois/share/opencv-dnn/detection/MobileNetSSD_deploy.prototxt'
88  self.rgb = False
89  elif (model == 'MobileNetV2SSD'):
90  classnames = '/jevois/share/darknet/yolo/data/coco.names'
91  modelname = '/jevois/share/opencv-dnn/detection/ssd_mobilenet_v2_coco_2018_03_29.pb'
92  configname = '/jevois/share/opencv-dnn/detection/ssd_mobilenet_v2_coco_2018_03_29.pbtxt'
93  elif (model == 'MobileNetSSDcoco'):
94  classnames = '/jevois/share/darknet/yolo/data/coco.names'
95  modelname = '/jevois/share/opencv-dnn/detection/ssd_mobilenet_v1_coco_2017_11_17.pb'
96  configname = '/jevois/share/opencv-dnn/detection/ssd_mobilenet_v1_coco_2017_11_17.pbtxt'
97  self.rgb = False
98  self.nmsThreshold = 0.1
99  elif (model == 'YOLOv3'):
100  classnames = '/jevois/share/darknet/yolo/data/coco.names'
101  modelname = '/jevois/share/darknet/yolo/weights/yolov3-tiny.weights'
102  configname = '/jevois/share/darknet/yolo/cfg/yolov3-tiny.cfg'
103  elif (model == 'YOLOv2'):
104  classnames = '/jevois/share/darknet/yolo/data/voc.names'
105  modelname = '/jevois/share/darknet/yolo/weights/yolov2-tiny-voc.weights'
106  configname = '/jevois/share/darknet/yolo/cfg/yolov2-tiny-voc.cfg'
107  self.inpWidth = 320
108  self.inpHeight = 240
109  else:
110  classnames = '/jevois/share/opencv-dnn/detection/opencv_face_detector.classes'
111  modelname = '/jevois/share/opencv-dnn/detection/opencv_face_detector.caffemodel'
112  configname = '/jevois/share/opencv-dnn/detection/opencv_face_detector.prototxt'
113  self.scale = 1.0
114  self.mean = [104.0, 177.0, 123.0]
115  self.rgb = False
116 
117  # Load names of classes
118  if classnames:
119  with open(classnames, 'rt') as f:
120  self.classes = f.read().rstrip('\n').split('\n')
121 
122  # Load a network
123  self.net = cv.dnn.readNet(modelname, configname)
124  self.net.setPreferableBackend(backend)
125  self.net.setPreferableTarget(target)
126  self.timer = jevois.Timer('Neural detection', 10, jevois.LOG_DEBUG)
127  self.model = model
128 
129  # ####################################################################################################
130  ## Get names of the network's output layers
131  def getOutputsNames(self, net):
132  layersNames = self.net.getLayerNames()
133  return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
134 
135  # ####################################################################################################
136  ## Analyze and draw boxes, object names, and confidence scores
137  def postprocess(self, frame, outs):
138  frameHeight = frame.shape[0]
139  frameWidth = frame.shape[1]
140 
141  def drawPred(classId, conf, left, top, right, bottom):
142  # Draw a bounding box.
143  cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
144 
145  label = '%.2f' % (conf * 100)
146 
147  # Print a label of class.
148  if self.classes:
149  if (classId >= len(self.classes)):
150  label = 'Oooops id=%d: %s' % (classId, label)
151  else:
152  label = '%s: %s' % (self.classes[classId], label)
153 
154  labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.4, 1)
155  top = max(top, labelSize[1])
156  cv.rectangle(frame, (left, top - labelSize[1]-2), (left + labelSize[0], top + baseLine),
157  (255, 255, 255), cv.FILLED)
158  cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0))
159 
160  layerNames = self.net.getLayerNames()
161  lastLayerId = self.net.getLayerId(layerNames[-1])
162  lastLayer = self.net.getLayer(lastLayerId)
163 
164  classIds = []
165  confidences = []
166  boxes = []
167  if self.net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
168  # Network produces output blob with a shape 1x1xNx7 where N is a number of
169  # detections and an every detection is a vector of values
170  # [batchId, classId, confidence, left, top, right, bottom]
171  for out in outs:
172  for detection in out[0, 0]:
173  confidence = detection[2]
174  if confidence > self.confThreshold:
175  left = int(detection[3])
176  top = int(detection[4])
177  right = int(detection[5])
178  bottom = int(detection[6])
179  width = right - left + 1
180  height = bottom - top + 1
181  classIds.append(int(detection[1]) - 1) # Skip background label
182  confidences.append(float(confidence))
183  boxes.append([left, top, width, height])
184  elif lastLayer.type == 'DetectionOutput':
185  # Network produces output blob with a shape 1x1xNx7 where N is a number of
186  # detections and an every detection is a vector of values
187  # [batchId, classId, confidence, left, top, right, bottom]
188  for out in outs:
189  for detection in out[0, 0]:
190  confidence = detection[2]
191  if confidence > self.confThreshold:
192  left = int(detection[3] * frameWidth)
193  top = int(detection[4] * frameHeight)
194  right = int(detection[5] * frameWidth)
195  bottom = int(detection[6] * frameHeight)
196  width = right - left + 1
197  height = bottom - top + 1
198  classIds.append(int(detection[1]) - 1) # Skip background label
199  confidences.append(float(confidence))
200  boxes.append([left, top, width, height])
201  elif lastLayer.type == 'Region':
202  # Network produces output blob with a shape NxC where N is a number of
203  # detected objects and C is a number of classes + 4 where the first 4
204  # numbers are [center_x, center_y, width, height]
205  classIds = []
206  confidences = []
207  boxes = []
208  for out in outs:
209  for detection in out:
210  scores = detection[5:]
211  classId = np.argmax(scores)
212  confidence = scores[classId]
213  if confidence > self.confThreshold:
214  center_x = int(detection[0] * frameWidth)
215  center_y = int(detection[1] * frameHeight)
216  width = int(detection[2] * frameWidth)
217  height = int(detection[3] * frameHeight)
218  left = int(center_x - width / 2)
219  top = int(center_y - height / 2)
220  classIds.append(classId)
221  confidences.append(float(confidence))
222  boxes.append([left, top, width, height])
223  else:
224  jevois.LERROR('Unknown output layer type: ' + lastLayer.type)
225  return
226 
227  indices = cv.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
228  for i in indices:
229  i = i[0]
230  box = boxes[i]
231  left = box[0]
232  top = box[1]
233  width = box[2]
234  height = box[3]
235  drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
236 
237  # ####################################################################################################
238  ## JeVois main processing function
239  def process(self, inframe, outframe):
240  frame = inframe.getCvBGR()
241  self.timer.start()
242 
243  frameHeight = frame.shape[0]
244  frameWidth = frame.shape[1]
245 
246  # Create a 4D blob from a frame.
247  blob = cv.dnn.blobFromImage(frame, self.scale, (self.inpWidth, self.inpHeight), self.mean, self.rgb, crop=False)
248 
249  # Run a model
250  self.net.setInput(blob)
251  if self.net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
252  frame = cv.resize(frame, (self.inpWidth, self.inpHeight))
253  self.net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info')
254  outs = self.net.forward(self.getOutputsNames(self.net))
255 
256  self.postprocess(frame, outs)
257 
258  # Create dark-gray (value 80) image for the bottom panel, 22 pixels tall:
259  msgbox = np.zeros((22, frame.shape[1], 3), dtype = np.uint8) + 80
260 
261  # Put efficiency information.
262  cv.putText(frame, 'JeVois Python Object Detection DNN - ' + self.model, (3, 15),
263  cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv.LINE_AA)
264  t, _ = self.net.getPerfProfile()
265  fps = self.timer.stop()
266  label = fps + ' - Inference time: %.2fms' % (t * 1000.0 / cv.getTickFrequency())
267  cv.putText(msgbox, label, (3, 15), cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv.LINE_AA)
268 
269  # Stack bottom panel below main image:
270  frame = np.vstack((frame, msgbox))
271 
272  # Send output frame to host:
273  outframe.sendCv(frame)
def process(self, inframe, outframe)
JeVois main processing function.
Object detection and recognition using OpenCV Deep Neural Networks (DNN)
def postprocess(self, frame, outs)
Analyze and draw boxes, object names, and confidence scores.
def __init__(self)
Constructor.
def getOutputsNames(self, net)
Get names of the network's output layers.
std::vector< std::string > split(std::string const &input, std::string const &regex="\")