-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminerva.wsgi
executable file
·89 lines (71 loc) · 2.25 KB
/
minerva.wsgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python
# -*- coding: utf-8 -*-
##
# this script serves as the main entrance
# to minerva
#
# @author Yuan Jin
# @created Oct. 27, 2012
# @updated Oct. 29, 2012
#
import sys
reload(sys)
sys.setdefaultencoding('UTF-8')
import cgi
from collections import OrderedDict
import string
import tesseract
import threading
import Queue
queue = Queue.Queue()
class OcrThread(threading.Thread):
def __init__(self, api, queue):
threading.Thread.__init__(self)
self.api = api
self.queue = queue
def run(self):
while True:
image_number = self.queue.get()
image = screenshots[image_number]
screenshots[image_number] = tesseract.ProcessPagesBuffer(image, len(image), self.api)
self.queue.task_done()
def ocr(language):
# thread pool
api = tesseract.TessBaseAPI()
api.Init(".", language, tesseract.OEM_DEFAULT)
# determine number of threads to work on the images
number_of_tasks = len(screenshots)
number_of_threads = 5 if number_of_tasks > 5 else number_of_tasks
for t in xrange(number_of_threads):
ot = OcrThread(api, queue)
ot.setDaemon(True)
ot.start()
queue.join()
return ' '.join([x for x in reversed(screenshots.values())])
def read_http(environ):
'read binary image file and write to local disk'
global screenshots
screenshots = OrderedDict()
# default language
language = 'eng'
bin_data = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ)
for key in bin_data.keys():
if key == 'Language type':
language = bin_data[key].value
else:
screenshots['%s' % key] = bin_data[key].value
queue.put('%s' % key)
return language
def application(environ, start_response):
try:
language = read_http(environ)
output = ocr(language)
if output is None:
raise Exception('Void output!')
header = [('Content-type', 'text/plain'), ('Content-Length', str(len(output)))]
start_response("200 OK", header)
return [output]
except Exception, e:
header = [('Content-type', 'text/plain'), ('Content-Length', str(len(str(e))))]
start_response("200 OK", header)
return [str(e)]