-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
Copy pathmodule.py
209 lines (188 loc) · 8.88 KB
/
module.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import argparse
import os
import ast
import paddle
import paddle.static
import paddle2onnx
import paddle2onnx as p2o
from paddleocr import PaddleOCR
from paddleocr.ppocr.utils.logging import get_logger
from paddleocr.tools.infer.utility import base64_to_cv2
from paddlehub.module.module import moduleinfo, runnable, serving
from .utils import read_images, save_result_image, mkdir
@moduleinfo(
name="multi_languages_ocr_db_crnn",
version="1.1.0",
summary="ocr service",
author="PaddlePaddle",
type="cv/text_recognition")
class MultiLangOCR:
def __init__(self,
lang="ch",
det=True,
rec=True,
use_angle_cls=False,
enable_mkldnn=False,
use_gpu=False,
box_thresh=0.6,
angle_classification_thresh=0.9):
"""
initialize with the necessary elements
Args:
lang(str): the selection of languages
det(bool): Whether to use text detector.
rec(bool): Whether to use text recognizer.
use_angle_cls(bool): Whether to use text orientation classifier.
enable_mkldnn(bool): Whether to enable mkldnn.
use_gpu (bool): Whether to use gpu.
box_thresh(float): the threshold of the detected text box's confidence
angle_classification_thresh(float): the threshold of the angle classification confidence
"""
self.lang = lang
self.logger = get_logger()
self.det = det
self.rec = rec
self.use_angle_cls = use_angle_cls
self.engine = PaddleOCR(
lang=lang,
det=det,
rec=rec,
use_angle_cls=use_angle_cls,
enable_mkldnn=enable_mkldnn,
use_gpu=use_gpu,
det_db_box_thresh=box_thresh,
cls_thresh=angle_classification_thresh)
self.det_model_dir = self.engine.text_detector.args.det_model_dir
self.rec_model_dir = self.engine.text_detector.args.rec_model_dir
self.cls_model_dir = self.engine.text_detector.args.cls_model_dir
def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False):
"""
Get the text in the predicted images.
Args:
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
paths (list[str]): The paths of images. If paths not images
output_dir (str): The directory to store output images.
visualization (bool): Whether to save image or not.
Returns:
res (list): The result of text detection box and save path of images.
"""
if images != [] and isinstance(images, list) and paths == []:
predicted_data = images
elif images == [] and isinstance(paths, list) and paths != []:
predicted_data = read_images(paths)
else:
raise TypeError("The input data is inconsistent with expectations.")
assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
all_results = []
for img in predicted_data:
result = {'save_path': ''}
if img is None:
result['data'] = []
all_results.append(result)
continue
original_image = img.copy()
rec_results = self.engine.ocr(img, det=self.det, rec=self.rec, cls=self.use_angle_cls)
rec_res_final = []
for line in rec_results:
if self.det and self.rec:
boxes = line[0]
text, score = line[1]
rec_res_final.append({'text': text, 'confidence': float(score), 'text_box_position': boxes})
elif self.det and not self.rec:
boxes = line
rec_res_final.append({'text_box_position': boxes})
else:
if self.use_angle_cls and not self.rec:
orientation, score = line
rec_res_final.append({'orientation': orientation, 'score': float(score)})
else:
text, score = line
rec_res_final.append({'text': text, 'confidence': float(score)})
result['data'] = rec_res_final
if visualization and result['data']:
result['save_path'] = save_result_image(original_image, rec_results, output_dir, self.directory,
self.lang, self.det, self.rec, self.logger)
all_results.append(result)
return all_results
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.recognize_text(images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
"""
Run as a command
"""
parser = self.arg_parser()
args = parser.parse_args(argvs)
if args.lang is not None:
self.lang = args.lang
self.det = args.det
self.rec = args.rec
self.use_angle_cls = args.use_angle_cls
self.engine = PaddleOCR(
lang=self.lang,
det=args.det,
rec=args.rec,
use_angle_cls=args.use_angle_cls,
enable_mkldnn=args.enable_mkldnn,
use_gpu=args.use_gpu,
det_db_box_thresh=args.box_thresh,
cls_thresh=args.angle_classification_thresh)
results = self.recognize_text(
paths=[args.input_path], output_dir=args.output_dir, visualization=args.visualization)
return results
def arg_parser(self):
parser = argparse.ArgumentParser(
description="Run the %s module." % self.name,
prog='hub run %s' % self.name,
usage='%(prog)s',
add_help=True)
parser.add_argument('--input_path', type=str, default=None, help="diretory to image. Required.", required=True)
parser.add_argument('--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not")
parser.add_argument('--output_dir', type=str, default='ocr_result', help="The directory to save output images.")
parser.add_argument(
'--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.")
parser.add_argument('--lang', type=str, default=None, help="the selection of languages")
parser.add_argument('--det', type=ast.literal_eval, default=True, help="whether use text detector or not")
parser.add_argument('--rec', type=ast.literal_eval, default=True, help="whether use text recognizer or not")
parser.add_argument(
'--use_angle_cls', type=ast.literal_eval, default=False, help="whether text orientation classifier or not")
parser.add_argument('--enable_mkldnn', type=ast.literal_eval, default=False, help="whether use mkldnn or not")
parser.add_argument(
"--box_thresh", type=float, default=0.6, help="set the threshold of the detected text box's confidence")
parser.add_argument(
"--angle_classification_thresh",
type=float,
default=0.9,
help="set the threshold of the angle classification confidence")
return parser
def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10):
'''
Export the model to ONNX format.
Args:
dirname(str): The directory to save the onnx model.
input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}``
opset_version(int): operator set
'''
v0, v1, v2 = paddle2onnx.__version__.split('.')
if int(v0) == 0 and int(v1) < 9:
raise ImportError("paddle2onnx>=0.9.0 is required")
if input_shape_dict is not None and not isinstance(input_shape_dict, dict):
raise Exception("input_shape_dict should be dict, eg. {'x': [-1, 3, -1, -1]}.")
if opset_version <= 9:
raise Exception("opset_version <= 9 is not surpported, please try with higher opset_version >=10.")
path_dict = {"det": self.det_model_dir, "rec": self.rec_model_dir, "cls": self.cls_model_dir}
for (key, path) in path_dict.items():
save_file = os.path.join(dirname, '{}_{}.onnx'.format(self.name, key))
exe = paddle.static.Executor(paddle.CPUPlace())
[program, feed_var_names, fetch_vars] = paddle.static.load_inference_model(
os.path.join(path, 'inference'), exe)
onnx_proto = p2o.run_convert(program, input_shape_dict=input_shape_dict, opset_version=opset_version)
mkdir(save_file)
with open(save_file, "wb") as f:
f.write(onnx_proto.SerializeToString())