-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSP-Morph-decode.py
235 lines (208 loc) · 6.77 KB
/
SP-Morph-decode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# Main Part of Speech mapping
posMap = {
"N-PRI": "Proper Noun Indeclinable", # first the subset since decoding is based on 'first match'
"N-LI": "Letter Indeclinable",
"N-OI": "Noun Other Type Indeclinable",
"N-": "Noun", # generic Noun
"A-NUI": "Numeral Indeclinable",
"A-": "Adjective",
"T-": "Article",
"V-": "Verb",
"P-": "Personal Pronoun",
"R-": "Relative Pronoun",
"C-": "Reciprocal Pronoun",
"D-": "Demonstrative Pronoun",
"K-": "Correlative Pronoun",
"I-": "Interrogative Pronoun",
"X-": "Indefinite Pronoun",
"Q-": "Correlative/Interrogative Pronoun",
"F-": "Reflexive Pronoun",
"S-": "Possessive Pronoun",
"ADV": "Adverb",
"CONJ": "Conjunction",
"COND": "Conditional",
"PRT": "Particle",
"PREP": "Preposition",
"INJ": "Interjection",
"ARAM": "Aramaic",
"HEB": "Hebrew",
"PUNCT": "Punctuation"
}
# grammatical case mapping
caseMap = {
"V": "Vocative",
"N": "Nominative",
"G": "Genitive",
"D": "Dative",
"A": "Accusative"
}
# grammatical number mapping
numberMap = {
"S": "Singular",
"P": "Plural"
}
# grammatical gender mapping
genderMap = {
"M": "Masculine",
"F": "Feminine",
"N": "Neuter"
}
# verb tense mapping
tenseMap = {
"P": "Present",
"I": "Imperfect",
"F": "Future",
"2F": "Second Future",
"A": "Aorist",
"2A": "Second Aorist",
"R": "Perfect",
"2R": "Second Perfect",
"L": "Pluperfect",
"2L": "Second Pluperfect",
"X": "No Tense Stated"
}
# verb voice mapping
voiceMap = {
"A": "Active",
"M": "Middle",
"P": "Passive",
"E": "Middle or Passive",
"D": "Middle Deponent",
"O": "Passive Deponent",
"N": "Middle or Passive Deponent",
"Q": "Impersonal Active",
"X": "No Voice"
}
# verb mode mapping
moodMap = {
"I": "Indicative",
"S": "Subjunctive",
"O": "Optative",
"M": "Imperative",
"N": "Infinitive",
"P": "Participle",
"R": "Imperative Participle"
}
# grammatical person mapping
personMap = {
"1": "First Person",
"2": "Second Person",
"3": "Third Person"
}
# Extra verb info mapping
verbExtraMap = {
"-M": "Middle significance",
"-C": "Contracted form",
"-T": "Transitive",
"-A": "Aeolic",
"-ATT": "Attic",
"-AP": "Apocopated form",
"-IRR": "Irregular or impure form"
}
# suffix mapping
suffixMap = {
"-K": "Crasis",
"-N": "Negative",
"-S": "Superlative",
"-C": "Comparative",
"-ABB": "Abbreviated",
"-I": "Interrogative",
"-ATT": "Attic",
"-P": "Particle Attached"
}
def decodeTag(tagInput):
input_str = tagInput.strip().upper()
output = {}
if not input_str:
output["Error"] = "Please enter a parsing tag."
return output
# Decode part of speech
# The first line retrieve an array of all the keys from posMap.
# We will iterating and find the first matching key.
pos = None
for key in posMap.keys():
if input_str.startswith(key):
pos = key
break
if pos is not None:
output["Part of Speech"] = posMap[pos]
input_str = input_str[len(pos):]
else:
output["Part of Speech"] = "Unknown or Unsupported"
return output
# Further decoding based on the detected part of speech
if pos in ["N-", "A-", "T-"] and len(input_str) >= 3:
output["Case"] = caseMap.get(input_str[0], "Unknown")
output["Number"] = numberMap.get(input_str[1], "Unknown")
output["Gender"] = genderMap.get(input_str[2], "Unknown")
# Verbs
elif pos == "V-":
# Split the remaining tag into parts
parts = input_str.split('-')
# Analyze Tense, Voice, Mood from the first part
if len(parts) > 0:
firstPart = parts[0]
tenseKey = None
for tk in tenseMap.keys():
if firstPart.startswith(tk):
tenseKey = tk
break
if tenseKey is not None:
output["Tense"] = tenseMap[tenseKey]
remaining = firstPart[len(tenseKey):]
if len(remaining) >= 2:
output["Voice"] = voiceMap.get(remaining[0], "Unknown")
output["Mood"] = moodMap.get(remaining[1], "Unknown")
else:
output["Tense"] = "Unknown"
# Analyze Person/Number or Case/Number/Gender for the second part
if len(parts) > 1:
secondPart = parts[1]
if output.get("Mood") == "Participle" and len(secondPart) >= 3:
output["Case"] = caseMap.get(secondPart[0], "Unknown")
output["Number"] = numberMap.get(secondPart[1], "Unknown")
output["Gender"] = genderMap.get(secondPart[2], "Unknown")
elif len(secondPart) >= 2:
output["Person"] = personMap.get(secondPart[0], "Unknown")
output["Number"] = numberMap.get(secondPart[1], "Unknown")
# Analyze verb extra info or suffix in the third part
if len(parts) > 2:
thirdPart = parts[2]
extraKey = None
for vk in verbExtraMap.keys():
if vk in thirdPart:
extraKey = vk
break
if extraKey:
output["Verb Extra"] = verbExtraMap[extraKey]
else:
output["Suffix"] = suffixMap.get(thirdPart, "Unknown")
# Reflexive Pronoun
elif pos in ["F-"]:
if len(input_str) >= 4:
output["Person"] = personMap.get(input_str[0], "Unknown")
output["Case"] = caseMap.get(input_str[1], "Unknown")
output["Number"] = numberMap.get(input_str[2], "Unknown")
output["Gender"] = genderMap.get(input_str[3], "Unknown")
# Possessive Pronoun
elif pos == "S-" and len(input_str) >= 5:
output["Person of Possessor"] = personMap.get(input_str[0], "Unknown")
output["Number of Possessor"] = numberMap.get(input_str[1], "Unknown")
output["Case of Possessed"] = caseMap.get(input_str[2], "Unknown")
output["Number of Possessed"] = numberMap.get(input_str[3], "Unknown")
output["Gender of Possessed"] = genderMap.get(input_str[4], "Unknown")
elif pos in ["P-", "R-", "C-", "D-", "K-", "I-", "X-", "Q-", "S-"]:
# The second part contains case, number, and gender
if len(input_str) >= 3:
output["Case"] = caseMap.get(input_str[0], "Unknown")
output["Number"] = numberMap.get(input_str[1], "Unknown")
output["Gender"] = genderMap.get(input_str[2], "Unknown")
# Decode suffix if present
for suf in suffixMap.keys():
if input_str.endswith(suf):
output["Suffix"] = suffixMap[suf]
break
return output
# Example usage:
result = decodeTag("N-PRS")
print(result)