JeVoisBase  1.22
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
Loading...
Searching...
No Matches
PySceneText.py
Go to the documentation of this file.
1import pyjevois
2if pyjevois.pro: import libjevoispro as jevois
3else: import libjevois as jevois
4import cv2
5import numpy as np
6
7from crnn import CRNN
8from db import DB
9
10## Detect and decode English or Chinese text on NPU using TIM-VX
11#
12# This module runs on the JeVois-Pro NPU using a quantized deep neural network. It is derived from
13# https://github.com/opencv/opencv_zoo/tree/master/models/text_recognition_crnn
14# See LICENSE for license information.
15#
16# By default, English is used, but you can change that to Chinese in the constructor
17#
18# This module is mainly intended as a tutorial for how to run quantized int8 DNNs on the NPU using OpenCV and TIM-VX,
19# here achieved through only small modifications of code from https://github.com/opencv/opencv_zoo - in particular,
20# the core class for this model, LPD_YuNet, was not modified at all, and only the demo code was edited to use the
21# JeVois GUI Helper for fast OpenGL drawing as opposed to slow drawings into OpenCV images.
22#
23# @author Laurent Itti
24#
25# @displayname PySceneText
26# @videomapping JVUI 0 0 30.0 CropScale=RGB24@512x288:YUYV 1920 1080 30.0 JeVois PySceneText
27# @email itti\@usc.edu
28# @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
29# @copyright Copyright (C) 2022 by Laurent Itti, iLab and the University of Southern California
30# @mainurl http://jevois.org
31# @supporturl http://jevois.org/doc
32# @otherurl http://iLab.usc.edu
33# @license GPL v3
34# @distribution Unrestricted
35# @restrictions None
36# @ingroup modules
38 # ###################################################################################################
39 ## Constructor
40 def __init__(self):
41 # Select a language here:
42 lang = "English"
43 #lang = "Chinese"
44
45 # Instantiate the model
46 if lang == "English":
47 self.modelname = "text_recognition_CRNN_EN_2021sep-act_int8-wt_int8-quantized.onnx"
48 self.charset = "charset_36_EN.txt"
49 self.tdname = "text_detection_DB_IC15_resnet18_2021sep.onnx"
50 elif lang == "Chinese":
51 self.modelname = "text_recognition_CRNN_CN_2021nov-act_int8-wt_int8-quantized.onnx"
52 self.charset = "charset_3944_CN.txt"
53 self.tdname = "text_detection_DB_TD500_resnet18_2021sep.onnx"
54 else:
55 jevois.LFATAL("Invalid language selected")
56
57 root = "/jevoispro/share/npu/other/"
58
59 self.recognizer = CRNN(modelPath = root + self.modelname, charsetPath = root + self.charset)
60
61 # Instantiate DB for text detection
62 self.detector = DB(modelPath = root + self.tdname,
63 inputSize = [512, 288],
64 binaryThreshold = 0.3,
65 polygonThreshold = 0.5,
66 maxCandidates = 200,
67 unclipRatio = 2.0,
68 backendId = cv2.dnn.DNN_BACKEND_TIMVX,
69 targetId = cv2.dnn.DNN_TARGET_NPU)
70
71 # Instantiate a timer for framerate:
72 self.timer = jevois.Timer('PySceneText', 10, jevois.LOG_DEBUG)
73
74 # Keep track of frame size:
75 self.w, self.h = (0, 0)
76
77 # ####################################################################################################
78 ## Process function with GUI output
79 def processGUI(self, inframe, helper):
80 # Start a new display frame, gets its size and also whether mouse/keyboard are idle:
81 idle, winw, winh = helper.startFrame()
82
83 # Draw full-resolution input frame from camera:
84 dx, dy, dw, dh = helper.drawInputFrame("c", inframe, False, False)
85
86 # Get the next camera image at processing resolution (may block until it is captured):
87 frame = inframe.getCvBGRp()
88 h, w, _ = frame.shape
89
90 # Start measuring image processing time:
91 self.timer.start()
92
93 w,h = (512,288)
94 frame = cv2.resize(frame, [w, h])
95
96 # Input size must be a multiple of 32 in this model
97 if (w % 32) != 0 or (h % 32) != 0:
98 jevois.LFATAL("Input width and height must be multiples of 32")
99
100 # Resize model if needed:
101 if self.w != w or self.h != h:
102 self.detector.setInputSize([w, h])
103 self.w, self.h = (w, h)
104
105 # Inference
106 results = self.detector.infer(frame)
107 texts = []
108 for box, score in zip(results[0], results[1]):
109 texts.append(self.recognizer.infer(frame, box.reshape(8)))
110
111 # Draw results:
112 pts = np.array(results[0]).astype(np.single)
113 helper.drawPoly(pts, 0xff0000ff, True)
114 for box, text in zip(results[0], texts):
115 helper.drawText(float(box[1][0]), float(box[1][1]), text, 0xff0000ff);
116
117 # Write frames/s info from our timer:
118 fps = self.timer.stop()
119 helper.iinfo(inframe, fps, winw, winh);
120 helper.itext("JeVois-Pro - Scene text detection and decoding")
121 helper.itext("Detection: " + self.tdname)
122 helper.itext("Charset: " + self.charset)
123 helper.itext("Recognition: " + self.modelname)
124
125 # End of frame:
126 helper.endFrame()
Detect and decode English or Chinese text on NPU using TIM-VX.
__init__(self)
Constructor.
processGUI(self, inframe, helper)
Process function with GUI output.
Definition db.py:10