JeVoisBase  1.20
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
PySceneText.py
Go to the documentation of this file.
1 import pyjevois
2 if pyjevois.pro: import libjevoispro as jevois
3 else: import libjevois as jevois
4 import cv2
5 import numpy as np
6 
7 from crnn import CRNN
8 from db import DB
9 
10 ## Detect and decode English or Chinese text on NPU using TIM-VX
11 #
12 # This module runs on the JeVois-Pro NPU using a quantized deep neural network. It is derived from
13 # https://github.com/opencv/opencv_zoo/tree/master/models/text_recognition_crnn
14 # See LICENSE for license information.
15 #
16 # By default, English is used, but you can change that to Chinese in the constructor
17 #
18 # This module is mainly intended as a tutorial for how to run quantized int8 DNNs on the NPU using OpenCV and TIM-VX,
19 # here achieved through only small modifications of code from https://github.com/opencv/opencv_zoo - in particular,
20 # the core class for this model, LPD_YuNet, was not modified at all, and only the demo code was edited to use the
21 # JeVois GUI Helper for fast OpenGL drawing as opposed to slow drawings into OpenCV images.
22 #
23 # @author Laurent Itti
24 #
25 # @displayname PySceneText
26 # @videomapping JVUI 0 0 30.0 CropScale=RGB24@512x288:YUYV 1920 1080 30.0 JeVois PySceneText
27 # @email itti\@usc.edu
28 # @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
29 # @copyright Copyright (C) 2022 by Laurent Itti, iLab and the University of Southern California
30 # @mainurl http://jevois.org
31 # @supporturl http://jevois.org/doc
32 # @otherurl http://iLab.usc.edu
33 # @license GPL v3
34 # @distribution Unrestricted
35 # @restrictions None
36 # @ingroup modules
38  # ###################################################################################################
39  ## Constructor
40  def __init__(self):
41  # Select a language here:
42  lang = "English"
43  #lang = "Chinese"
44 
45  # Instantiate the model
46  if lang == "English":
47  self.modelname = "text_recognition_CRNN_EN_2021sep-act_int8-wt_int8-quantized.onnx"
48  self.charset = "charset_36_EN.txt"
49  self.tdname = "text_detection_DB_IC15_resnet18_2021sep.onnx"
50  elif lang == "Chinese":
51  self.modelname = "text_recognition_CRNN_CN_2021nov-act_int8-wt_int8-quantized.onnx"
52  self.charset = "charset_3944_CN.txt"
53  self.tdname = "text_detection_DB_TD500_resnet18_2021sep.onnx"
54  else:
55  jevois.LFATAL("Invalid language selected")
56 
57  root = "/jevoispro/share/npu/other/"
58 
59  self.recognizer = CRNN(modelPath = root + self.modelname, charsetPath = root + self.charset)
60 
61  # Instantiate DB for text detection
62  self.detector = DB(modelPath = root + self.tdname,
63  inputSize = [512, 288],
64  binaryThreshold = 0.3,
65  polygonThreshold = 0.5,
66  maxCandidates = 200,
67  unclipRatio = 2.0,
68  backendId = cv2.dnn.DNN_BACKEND_TIMVX,
69  targetId = cv2.dnn.DNN_TARGET_NPU)
70 
71  # Instantiate a timer for framerate:
72  self.timer = jevois.Timer('PySceneText', 10, jevois.LOG_DEBUG)
73 
74  # Keep track of frame size:
75  self.w, self.h = (0, 0)
76 
77  # ####################################################################################################
78  ## Process function with GUI output
79  def processGUI(self, inframe, helper):
80  # Start a new display frame, gets its size and also whether mouse/keyboard are idle:
81  idle, winw, winh = helper.startFrame()
82 
83  # Draw full-resolution input frame from camera:
84  dx, dy, dw, dh = helper.drawInputFrame("c", inframe, False, False)
85 
86  # Get the next camera image at processing resolution (may block until it is captured):
87  frame = inframe.getCvBGRp()
88  h, w, _ = frame.shape
89 
90  # Start measuring image processing time:
91  self.timer.start()
92 
93  w,h = (512,288)
94  frame = cv2.resize(frame, [w, h])
95 
96  # Input size must be a multiple of 32 in this model
97  if (w % 32) != 0 or (h % 32) != 0:
98  jevois.LFATAL("Input width and height must be multiples of 32")
99 
100  # Resize model if needed:
101  if self.w != w or self.h != h:
102  self.detector.setInputSize([w, h])
103  self.w, self.h = (w, h)
104 
105  # Inference
106  results = self.detector.infer(frame)
107  texts = []
108  for box, score in zip(results[0], results[1]):
109  texts.append(self.recognizer.infer(frame, box.reshape(8)))
110 
111  # Draw results:
112  pts = np.array(results[0]).astype(np.single)
113  helper.drawPoly(pts, 0xff0000ff, True)
114  for box, text in zip(results[0], texts):
115  helper.drawText(float(box[1][0]), float(box[1][1]), text, 0xff0000ff);
116 
117  # Write frames/s info from our timer:
118  fps = self.timer.stop()
119  helper.iinfo(inframe, fps, winw, winh);
120  helper.itext("JeVois-Pro - Scene text detection and decoding", 0, -1)
121  helper.itext("Detection: " + self.tdname, 0, -1)
122  helper.itext("Charset: " + self.charset, 0, -1)
123  helper.itext("Recognition: " + self.modelname, 0, -1)
124 
125  # End of frame:
126  helper.endFrame()
PySceneText.PySceneText.detector
detector
Definition: PySceneText.py:62
crnn.CRNN
Definition: crnn.py:10
db.DB
Definition: db.py:10
PySceneText.PySceneText.tdname
tdname
Definition: PySceneText.py:49
PySceneText.PySceneText.__init__
def __init__(self)
Constructor.
Definition: PySceneText.py:40
PySceneText.PySceneText.processGUI
def processGUI(self, inframe, helper)
Process function with GUI output.
Definition: PySceneText.py:79
PySceneText.PySceneText.modelname
modelname
Definition: PySceneText.py:47
PySceneText.PySceneText
Detect and decode English or Chinese text on NPU using TIM-VX.
Definition: PySceneText.py:37
PySceneText.PySceneText.recognizer
recognizer
Definition: PySceneText.py:59
demo.float
float
Definition: demo.py:39
PySceneText.PySceneText.timer
timer
Definition: PySceneText.py:72
PySceneText.PySceneText.charset
charset
Definition: PySceneText.py:48
PySceneText.PySceneText.h
h
Definition: PySceneText.py:75
jevois::Timer