forked from ypsu/latex-to-unicode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert.py
executable file
·149 lines (120 loc) · 3.8 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
import re
from pathlib import Path
source_path = Path(__file__).resolve()
source_dir = source_path.parent
def convert(s):
global data_loaded
if data_loaded == False:
load_data()
data_loaded = True
ss = convert_single_symbol(s)
if ss != None:
return ss
s = convert_latex_symbols(s)
s = process_starting_modifiers(s)
s = apply_all_modifiers(s)
return s
# If s is just a latex code "alpha" or "beta" it converts it to its
# unicode representation.
def convert_single_symbol(s):
ss = "\\" + s
for (code, val) in latex_symbols:
if code == ss:
return val
return None
# Replace each "\alpha", "\beta" and similar latex symbols with
# their unicode representation.
def convert_latex_symbols(s):
for (code, val) in latex_symbols:
s = s.replace(code, val)
return s
# If s start with "it ", "cal ", etc. then make the whole string
# italic, calligraphic, etc.
def process_starting_modifiers(s):
s = re.sub("^bb ", r"\\bb{", s)
s = re.sub("^bf ", r"\\bf{", s)
s = re.sub("^it ", r"\\it{", s)
s = re.sub("^cal ", r"\\cal{", s)
s = re.sub("^frak ", r"\\frak{", s)
s = re.sub("^mono ", r"\\mono{", s)
return s
def apply_all_modifiers(s):
s = apply_modifier(s, "^", superscripts)
s = apply_modifier(s, "_", subscripts)
s = apply_modifier(s, "\\bb", textbb)
s = apply_modifier(s, "\\bf", textbf)
s = apply_modifier(s, "\\it", textit)
s = apply_modifier(s, "\\cal", textcal)
s = apply_modifier(s, "\\frak", textfrak)
s = apply_modifier(s, "\\mono", textmono)
return s
# Example: modifier = "^", D = superscripts
# This will search for the ^ signs and replace the next
# digit or (digits when {} is used) with its/their uppercase representation.
def apply_modifier(text, modifier, D):
text = text.replace(modifier, "^")
newtext = ""
mode_normal, mode_modified, mode_long = range(3)
mode = mode_normal
for ch in text:
if mode == mode_normal and ch == "^":
mode = mode_modified
continue
elif mode == mode_modified and ch == "{":
mode = mode_long
continue
elif mode == mode_modified:
newtext += D.get(ch, ch)
mode = mode_normal
continue
elif mode == mode_long and ch == "}":
mode = mode_normal
continue
if mode == mode_normal:
newtext += ch
else:
newtext += D.get(ch, ch)
return newtext
def load_data():
load_symbols()
load_dict(f"{source_dir}/data/subscripts", subscripts)
load_dict(f"{source_dir}/data/superscripts", superscripts)
load_dict(f"{source_dir}/data/textbb", textbb)
load_dict(f"{source_dir}/data/textbf", textbf)
load_dict(f"{source_dir}/data/textit", textit)
load_dict(f"{source_dir}/data/textcal", textcal)
load_dict(f"{source_dir}/data/textfrak", textfrak)
load_dict(f"{source_dir}/data/textmono", textmono)
def load_dict(filename, D):
with open(filename, "r") as f:
line = f.readline()
while line != "":
words = line.split()
code = words[0]
val = words[1]
D[code] = val
line = f.readline()
def load_symbols():
with open(f"{source_dir}/data/symbols", "r") as f:
line = f.readline()
while line != "":
words = line.split()
code = words[0]
val = words[1]
latex_symbols.append((code, val))
line = f.readline()
data_loaded = False
superscripts = {}
subscripts = {}
textbb = {}
textbf = {}
textit = {}
textcal = {}
textfrak = {}
textmono = {}
latex_symbols = []
if __name__ == "__main__":
user_input = input()
uni_output = convert(user_input)
print(uni_output, end=" ")