-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpos_cli.py
57 lines (48 loc) · 1.78 KB
/
pos_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import argparse
from pos_model import POSModel
def main() -> None:
""" Main function to run the Northern Kurdish POS Tagger using CLI"""
parser = argparse.ArgumentParser(
description="Command line interface for Northern Kurdish POS tagging."
)
parser.add_argument(
"--pos_model",
type=str,
default="Baseline",
choices=["All", "Baseline", "HMM", "AveragedPerceptron", "CRF", "ExtraTrees", "BiLSTM", "NK-XLMR"],
help="Name of the POS model to use."
)
parser.add_argument(
"--training_data_type",
type=str,
default="augmented",
choices=["augmented", "original"],
help="Type of training data used for training the model."
)
parser.add_argument(
"--sentence",
type=str,
required=True,
help="The sentence to perform POS tagging on."
)
parser.add_argument(
"--tokenization_method",
type=str,
default="KLPT",
choices=["KLPT", "manual", "NLTK"],
help="The tokenization method to use. Use manual in case you manually tokenize the sentence by splitting off "
"the IZAFE, Oblique and indefinite case markers from the nouns."
)
args = parser.parse_args()
if args.pos_model == 'All':
for model_name in POSModel.POS_MODELS:
pos_model = POSModel(model_name, args.training_data_type)
pos_model.load_pos_model()
print(f"{model_name} :")
print(pos_model.predict_pos_tags(args.sentence, args.tokenization_method))
else:
pos_model = POSModel(args.pos_model, args.training_data_type)
pos_model.load_pos_model()
print(pos_model.predict_pos_tags(args.sentence, args.tokenization_method))
if __name__ == "__main__":
main()