This repository has been archived by the owner on Mar 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathapp.py
executable file
·112 lines (104 loc) · 3.85 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/python3
from PIL import ImageFont, ImageDraw, Image
import tamil
import numpy as np
import time
import copy
from pprint import pprint
#import unicodedata
import sys, os
import random
from fontdb import get_font_names, FONTDB
MNIST_R = 1000
W=28
H=W
Wtgt=28
Htgt=Wtgt
def img2array(img):
igray = img.convert('L')
#pprint(igray.tobytes())
bytes = [ (float(val) > 0.0)*255.0 for val in igray.tobytes() ]
return np.array(bytes).reshape(Wtgt*Htgt)
# 0) setup font db and regular/smalls across available fonts.
#we skip TAM, TAB fonts.
# 1) Setup letters to be built
uyir_plus_ayutham = copy.copy(tamil.utf8.uyir_letters)
uyir_plus_ayutham.append( tamil.utf8.ayudha_letter )
# 1.1) Initialize MNIST variables
data_image = np.zeros((MNIST_R,Wtgt*Htgt))
data_label = np.zeros((MNIST_R,1))
n_rows = 0
def print_completion():
print("Completed %g%%"%(n_rows/float(MNIST_R)*100.0))
# 2) Build set given a font specification and return an array of 13-row images and Labels
def build_letter_set(fontobj,rotate=False,translate=False):
data_img = np.zeros((13,784))
data_lbl = np.zeros((13,1))
shuffle_idx = list(range(0,len(uyir_plus_ayutham)))
random.shuffle(shuffle_idx)
for pos,idx in enumerate(shuffle_idx):
u = uyir_plus_ayutham[idx]
image = Image.new('RGBA',(W,H),(0,0,0,255))#,(0,0,0,0))#grayscale
draw = ImageDraw.Draw(image)
draw.rectangle([(0,0),(W,H)],fill=(0,0,0,255))
if u == tamil.utf8.uyir_letters[-1]:
#au.is over-rendered
font = fontobj.M
else:
font = fontobj.L
tw,th=(draw.textsize(u,font=font))
tw,th = min(tw,W), min(th,H)
draw.text(((W-tw)/4,0),u, font=font,fill=(255,255,255,255))
if translate:
# +/-5 on X,Y centered
tvec =np.floor(np.random.random((2))*5-2)
else:
tvec = np.zeros((2))
tvec = (tvec[0],tvec[1])
if rotate and u not in [tamil.utf8.uyir_letters[-1], tamil.utf8.ayudha_letter]:
# cannot rotate Aytham letter due to 28x28 square
theta=random.choice(range(-15,15))
image = image.rotate(theta,translate=tvec)
if rotate or translate:
image=image.crop([0,0,W,H])
#image=image.resize((Wtgt,Htgt),Image.BILINEAR)
data_img[pos,:] = img2array(image)
data_lbl[pos] = idx
return data_img,data_lbl
def main():
n_rows = 0
FONTNAME = list(FONTDB.keys())
while n_rows < MNIST_R:
# pick a font.
fontobj = FONTDB[ FONTNAME[random.choice(range(0,len(FONTDB)))] ]
rotate = n_rows > 30000
translate = n_rows > 50000
data_img,data_lbl = build_letter_set(fontobj,rotate,translate)
pos = 0
while (n_rows < MNIST_R) and (pos < len(data_lbl)):
data_image[n_rows,:]=data_img[pos,:]
data_label[n_rows] = data_lbl[pos]
n_rows += 1
pos += 1
print("Added %d rows (total %d / %d)"%(pos,n_rows,MNIST_R))
#print_completion()
data_label_onehot = np.zeros((max(data_label.shape),13))
for idx,pos in enumerate(data_label): data_label_onehot[idx][int(pos)]=1.0;
np.save(os.path.join( os.getcwd(),'data','train-image-'+str(time.time())),data_image.astype(np.uint8))
np.save(os.path.join( os.getcwd(),'data','train-label-'+str(time.time())+'-onehot'),data_label_onehot.astype(np.uint8))
def draw_composite():
#run after main.
im = Image.new('RGBA',(28*13,28*16),(0,0,0,255))
for rows in range(13):
for col in range(16):
while True:
lbl = random.choice(range(MNIST_R))
if data_label[lbl] == rows:
break
letter = data_image[lbl].reshape(W,H)
sub_im = Image.fromarray(letter)
im.paste(sub_im,(rows*W,col*H))
im.show()
if __name__ == "__main__":
pprint(FONTDB)
main()