From c1f3cbccc944d2fa7042b7527a0a9cd77f855231 Mon Sep 17 00:00:00 2001 From: BLKSerene <blkserene@gmail.com> Date: Fri, 12 Jan 2024 11:36:32 +0800 Subject: [PATCH] =?UTF-8?q?Menu:=20Allow=20editing=20of=20tagset=20mapping?= =?UTF-8?q?=20of=20spaCy's=20Catalan,=20Danish,=20French,=20Greek=20(Moder?= =?UTF-8?q?n),=20Macedonian,=20Norwegian=20(Bokm=C3=A5l),=20Portuguese,=20?= =?UTF-8?q?Russian,=20Spanish,=20and=20Ukrainian=20part-of-speech=20tagger?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + wordless/wl_nlp/wl_pos_tagging.py | 16 +++- wordless/wl_settings/wl_settings_default.py | 84 +++++++++++++++---- .../wl_settings/wl_settings_pos_tagging.py | 27 ++++-- wordless/wl_tagsets/wl_tagset_bod_botok.py | 2 +- .../wl_tagsets/wl_tagset_cat_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_dan_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_ell_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_eng_penn_treebank.py | 2 +- .../wl_tagsets/wl_tagset_eng_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_fra_universal.py | 40 +++++++++ wordless/wl_tagsets/wl_tagset_jpn_unidic.py | 2 +- wordless/wl_tagsets/wl_tagset_khm_alt.py | 2 +- wordless/wl_tagsets/wl_tagset_kor_mecab.py | 2 +- .../wl_tagsets/wl_tagset_lao_seqlabeling.py | 2 +- .../wl_tagset_lao_yunshan_cup_2020.py | 2 +- .../wl_tagsets/wl_tagset_nor_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_por_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_rus_open_corpora.py | 2 +- .../wl_tagset_rus_russian_national_corpus.py | 2 +- .../wl_tagsets/wl_tagset_rus_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_spa_universal.py | 40 +++++++++ .../wl_tagsets/wl_tagset_tha_blackboard.py | 2 +- wordless/wl_tagsets/wl_tagset_tha_orchid.py | 2 +- .../wl_tagsets/wl_tagset_ukr_universal.py | 40 +++++++++ wordless/wl_tagsets/wl_tagset_universal.py | 40 --------- .../wl_tagsets/wl_tagset_vie_underthesea.py | 2 +- wordless/wl_widgets/wl_item_delegates.py | 18 +--- 28 files changed, 517 insertions(+), 93 deletions(-) create mode 100644 wordless/wl_tagsets/wl_tagset_cat_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_dan_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_ell_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_eng_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_fra_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_nor_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_por_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_rus_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_spa_universal.py create mode 100644 wordless/wl_tagsets/wl_tagset_ukr_universal.py delete mode 100644 wordless/wl_tagsets/wl_tagset_universal.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 480e79d4e..e9a68dd0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ - Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic ### ✨ Improvements +- Menu: Allow editing of tagset mapping of spaCy's Catalan, Danish, French, Greek (Modern), Macedonian, Norwegian (Bokmål), Portuguese, Russian, Spanish, and Ukrainian part-of-speech taggers - Utils: Update custom stop word lists ### 📌 Bugfixes diff --git a/wordless/wl_nlp/wl_pos_tagging.py b/wordless/wl_nlp/wl_pos_tagging.py index 707867519..0173d5133 100644 --- a/wordless/wl_nlp/wl_pos_tagging.py +++ b/wordless/wl_nlp/wl_pos_tagging.py @@ -26,6 +26,11 @@ from wordless.wl_nlp import wl_nlp_utils, wl_word_tokenization from wordless.wl_utils import wl_conversion +UNIVERSAL_TAGSETS_SPACY = [ + 'spacy_cat', 'spacy_dan', 'spacy_fra', 'spacy_ell', 'spacy_mkd', + 'spacy_nob', 'spacy_por', 'spacy_rus', 'spacy_spa', 'spacy_ukr' +] + def wl_pos_tag(main, inputs, lang, pos_tagger = 'default', tagset = 'default'): tokens_tagged = [] @@ -168,7 +173,16 @@ def wl_pos_tag(main, inputs, lang, pos_tagger = 'default', tagset = 'default'): tokens_tagged.insert(empty_offset, ('', '')) # Convert to universal POS tags - if not pos_tagger.startswith('spacy_') and not pos_tagger.startswith('stanza_') and tagset == 'universal': + if ( + tagset == 'universal' + and ( + ( + not pos_tagger.startswith('spacy_') + and not pos_tagger.startswith('stanza_') + ) + or pos_tagger in UNIVERSAL_TAGSETS_SPACY + ) + ): mappings = { tag: tag_universal for tag, tag_universal, _, _ in main.settings_custom['pos_tagging']['tagsets']['mapping_settings'][lang][pos_tagger] diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index ae6211bf0..31e2d6857 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -16,24 +16,35 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # ---------------------------------------------------------------------- +import copy + import networkx from PyQt5.QtCore import QCoreApplication from PyQt5.QtWidgets import QDesktopWidget from wordless.wl_settings import wl_settings_global from wordless.wl_tagsets import ( - wl_tagset_universal, + wl_tagset_cat_universal, + wl_tagset_dan_universal, wl_tagset_eng_penn_treebank, + wl_tagset_eng_universal, + wl_tagset_ell_universal, + wl_tagset_fra_universal, wl_tagset_jpn_unidic, wl_tagset_khm_alt, wl_tagset_kor_mecab, wl_tagset_lao_seqlabeling, wl_tagset_lao_yunshan_cup_2020, + wl_tagset_nor_universal, + wl_tagset_por_universal, wl_tagset_rus_open_corpora, wl_tagset_rus_russian_national_corpus, + wl_tagset_rus_universal, + wl_tagset_spa_universal, wl_tagset_tha_blackboard, wl_tagset_tha_orchid, wl_tagset_bod_botok, + wl_tagset_ukr_universal, wl_tagset_vie_underthesea ) from wordless.wl_utils import wl_misc, wl_paths @@ -1580,51 +1591,90 @@ def init_settings_default(main): }, 'mapping_settings': { + 'cat': { + 'spacy_cat': copy.deepcopy(wl_tagset_cat_universal.tagset_mapping) + }, + + 'dan': { + 'spacy_dan': copy.deepcopy(wl_tagset_dan_universal.tagset_mapping) + }, + 'eng_gb': { - 'nltk_perceptron_eng': wl_tagset_eng_penn_treebank.MAPPINGS, + 'nltk_perceptron_eng': wl_tagset_eng_penn_treebank.tagset_mapping, }, + 'eng_us': { - 'nltk_perceptron_eng': wl_tagset_eng_penn_treebank.MAPPINGS, + 'nltk_perceptron_eng': wl_tagset_eng_penn_treebank.tagset_mapping, + }, + + 'ell': { + 'spacy_ell': copy.deepcopy(wl_tagset_ell_universal.tagset_mapping) + }, + + 'fra': { + 'spacy_fra': copy.deepcopy(wl_tagset_fra_universal.tagset_mapping) }, 'jpn': { - 'sudachipy_jpn': wl_tagset_jpn_unidic.MAPPINGS + 'sudachipy_jpn': wl_tagset_jpn_unidic.tagset_mapping }, 'khm': { - 'khmer_nltk_khm': wl_tagset_khm_alt.MAPPINGS + 'khmer_nltk_khm': wl_tagset_khm_alt.tagset_mapping }, 'kor': { - 'python_mecab_ko_mecab': wl_tagset_kor_mecab.MAPPINGS + 'python_mecab_ko_mecab': wl_tagset_kor_mecab.tagset_mapping }, 'lao': { - 'laonlp_seqlabeling': wl_tagset_lao_seqlabeling.MAPPINGS, - 'laonlp_yunshan_cup_2020': wl_tagset_lao_yunshan_cup_2020.MAPPINGS + 'laonlp_seqlabeling': wl_tagset_lao_seqlabeling.tagset_mapping, + 'laonlp_yunshan_cup_2020': wl_tagset_lao_yunshan_cup_2020.tagset_mapping + }, + + 'mkd': { + 'spacy_mkd': copy.deepcopy(wl_tagset_eng_universal.tagset_mapping) + }, + + 'nob': { + 'spacy_nob': copy.deepcopy(wl_tagset_nor_universal.tagset_mapping) + }, + + 'por_br': { + 'spacy_por': copy.deepcopy(wl_tagset_por_universal.tagset_mapping) + }, + + 'por_pt': { + 'spacy_por': copy.deepcopy(wl_tagset_por_universal.tagset_mapping) }, 'rus': { - 'nltk_perceptron_rus': wl_tagset_rus_russian_national_corpus.MAPPINGS, - 'pymorphy3_morphological_analyzer': wl_tagset_rus_open_corpora.MAPPINGS + 'nltk_perceptron_rus': wl_tagset_rus_russian_national_corpus.tagset_mapping, + 'pymorphy3_morphological_analyzer': wl_tagset_rus_open_corpora.tagset_mapping, + 'spacy_rus': copy.deepcopy(wl_tagset_rus_universal.tagset_mapping) + }, + + 'spa': { + 'spacy_spa': copy.deepcopy(wl_tagset_spa_universal.tagset_mapping) }, 'tha': { - 'pythainlp_perceptron_blackboard': wl_tagset_tha_blackboard.MAPPINGS, - 'pythainlp_perceptron_orchid': wl_tagset_tha_orchid.MAPPINGS, - 'pythainlp_perceptron_pud': wl_tagset_universal.MAPPINGS + 'pythainlp_perceptron_blackboard': wl_tagset_tha_blackboard.tagset_mapping, + 'pythainlp_perceptron_orchid': wl_tagset_tha_orchid.tagset_mapping, + 'pythainlp_perceptron_pud': copy.deepcopy(wl_tagset_eng_universal.tagset_mapping) }, 'bod': { - 'botok_bod': wl_tagset_bod_botok.MAPPINGS + 'botok_bod': wl_tagset_bod_botok.tagset_mapping }, 'ukr': { - 'pymorphy3_morphological_analyzer': wl_tagset_rus_open_corpora.MAPPINGS + 'pymorphy3_morphological_analyzer': wl_tagset_rus_open_corpora.tagset_mapping, + 'spacy_ukr': copy.deepcopy(wl_tagset_ukr_universal.tagset_mapping) }, 'vie': { - 'underthesea_vie': wl_tagset_vie_underthesea.MAPPINGS + 'underthesea_vie': wl_tagset_vie_underthesea.tagset_mapping } } } @@ -2342,7 +2392,7 @@ def init_settings_default(main): } # Tagsets - settings_default['pos_tagging']['tagsets']['preview_settings']['preview_pos_tagger'] = settings_default['pos_tagging']['pos_tagger_settings']['pos_taggers'] + settings_default['pos_tagging']['tagsets']['preview_settings']['preview_pos_tagger'] = settings_default['pos_tagging']['pos_tagger_settings']['pos_taggers'].copy() # Custom stop word lists for lang in wl_settings_global.SETTINGS_GLOBAL['langs'].values(): diff --git a/wordless/wl_settings/wl_settings_pos_tagging.py b/wordless/wl_settings/wl_settings_pos_tagging.py index 2b8942179..da70d3ccf 100644 --- a/wordless/wl_settings/wl_settings_pos_tagging.py +++ b/wordless/wl_settings/wl_settings_pos_tagging.py @@ -21,8 +21,8 @@ from PyQt5.QtCore import pyqtSignal, Qt from PyQt5.QtGui import QStandardItem from PyQt5.QtWidgets import ( - QCheckBox, QGroupBox, QLabel, QPushButton, QStackedWidget, - QTextEdit, QWidget + QCheckBox, QGroupBox, QLabel, QPlainTextEdit, QPushButton, + QStackedWidget, QTextEdit ) from wordless.wl_dialogs import wl_dialogs_misc, wl_msg_boxes @@ -271,8 +271,6 @@ def __init__(self, main): self.pos_tag_mappings_loaded = False - self.settings_tagsets = QWidget(self) - # Preview Settings self.group_box_preview_settings = QGroupBox(self.tr('Preview Settings:'), self) @@ -316,7 +314,8 @@ def __init__(self, main): self.stacked_widget_num_pos_tags.addWidget(self.label_tagsets_num_pos_tags) self.stacked_widget_num_pos_tags.addWidget(self.label_tagsets_uneditable) - self.table_mappings.setItemDelegate(wl_item_delegates.Wl_Item_Delegate_Combo_Box( + self.table_mappings.setItemDelegateForColumn(0, wl_item_delegates.Wl_Item_Delegate_Uneditable(self.table_mappings)) + self.table_mappings.setItemDelegateForColumn(1, wl_item_delegates.Wl_Item_Delegate_Combo_Box( parent = self.table_mappings, items = [ 'ADJ', @@ -338,9 +337,10 @@ def __init__(self, main): 'SYM', 'X' ], - col = 1, editable = True )) + self.table_mappings.setItemDelegateForColumn(2, wl_item_delegates.Wl_Item_Delegate(self.table_mappings, QPlainTextEdit)) + self.table_mappings.setItemDelegateForColumn(3, wl_item_delegates.Wl_Item_Delegate(self.table_mappings, QPlainTextEdit)) self.button_tagsets_reset.setMinimumWidth(100) self.button_tagsets_reset_all.setMinimumWidth(100) @@ -399,7 +399,12 @@ def preview_pos_tagger_changed(self): preview_pos_tagger = self.settings_custom['preview_settings']['preview_pos_tagger'][preview_lang] - if not preview_pos_tagger.startswith('spacy_') and not preview_pos_tagger.startswith('stanza_'): + if ( + ( + not preview_pos_tagger.startswith('spacy_') + and not preview_pos_tagger.startswith('stanza_') + ) or preview_pos_tagger in wl_pos_tagging.UNIVERSAL_TAGSETS_SPACY + ): self.combo_box_tagsets_lang.setEnabled(False) self.combo_box_tagsets_pos_tagger.setEnabled(False) self.button_tagsets_reset.setEnabled(False) @@ -464,6 +469,8 @@ def reset_currently_shown_table(self): for i in range(self.table_mappings.model().rowCount()): self.table_mappings.model().item(i, 1).setText(mappings[i][1]) + self.table_mappings.model().item(i, 2).setText(mappings[i][2]) + self.table_mappings.model().item(i, 3).setText(mappings[i][3]) self.table_mappings.enable_updates() @@ -520,10 +527,12 @@ def apply_settings(self): # Mapping Settings preview_lang = self.settings_custom['preview_settings']['preview_lang'] preview_pos_tagger = self.settings_custom['preview_settings']['preview_pos_tagger'][preview_lang] + mapping = self.settings_custom['mapping_settings'][preview_lang][preview_pos_tagger] for i in range(self.table_mappings.model().rowCount()): - if not preview_pos_tagger.startswith('spacy_'): - self.settings_custom['mapping_settings'][preview_lang][preview_pos_tagger][i][1] = self.table_mappings.model().item(i, 1).text() + mapping[i][1] = self.table_mappings.model().item(i, 1).text() + mapping[i][2] = self.table_mappings.model().item(i, 2).text() + mapping[i][3] = self.table_mappings.model().item(i, 3).text() return True diff --git a/wordless/wl_tagsets/wl_tagset_bod_botok.py b/wordless/wl_tagsets/wl_tagset_bod_botok.py index 2bf56adce..42e4104a5 100644 --- a/wordless/wl_tagsets/wl_tagset_bod_botok.py +++ b/wordless/wl_tagsets/wl_tagset_bod_botok.py @@ -17,7 +17,7 @@ # ---------------------------------------------------------------------- # Reference: https://github.com/Esukhia/botok/blob/master/botok/vars.py -MAPPINGS = [ +tagset_mapping = [ ['ADJ', 'ADJ', 'Adjectives', ''], ['ADP', 'ADP', 'Adposition', ''], ['ADV', 'ADV', 'Adverb', ''], diff --git a/wordless/wl_tagsets/wl_tagset_cat_universal.py b/wordless/wl_tagsets/wl_tagset_cat_universal.py new file mode 100644 index 000000000..7251f73de --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_cat_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Catalan +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Reference: https://universaldependencies.org/ca/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'gran, vell, verd, incomprensible\nprimer, segon, tercer'], + ['ADP', 'ADP', 'Adposition', '[English] in, to, during'], + ['ADV', 'ADV', 'Adverb', 'molt, bé, exactament, demà, dalt, baix\nInterrogative or exclamative adverbs: on, quan, com, per què\nDemonstrative adverbs: aquí, allí, ara, després\nTotality adverbs: sempre\nNegative adverbs: mai'], + ['AUX', 'AUX', 'Auxiliary', 'Tense auxiliaries: [English] has (done), is (doing), will (do)\nPassive auxiliaries: [English] was (done), got (done)\nModal auxiliaries: [English] should (do), must (do)\nVerbal copulas: [English] (He) is (a teacher.)\nAgreement auxiliaries: [K’iche’] la (2nd person singular formal), alaq (2nd person plural formal)'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', '[English] and, or, but'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', '[English] (I believe) that (he will come.), if, while'], + ['DET', 'DET', 'Determiner', 'Articles (a closed class indicating definiteness, specificity or givenness): [English] a, an, the\nPossessive determiners (which modify a nominal; note that some languages use PRON for similar words): [Czech] můj, tvůj, jeho, její, náš, váš, jejich\nDemonstrative determiners: [English] (I saw) this (car yesterday.)\nInterrogative determiners: [English] Which (car do you like?)\nRelative determiners: [English] (I wonder) which (car you like.)\nQuantity determiners (quantifiers):\n\tIndefinite: [English] any\n\tUniversal: [English] all\n\tNegative: [English] (We have) no (cars available.)'], + ['INTJ', 'INTJ', 'Interjection', 'psst, ai, bravo, hola, Sí(, perque…), No(, no ho crec.)'], + ['NOUN', 'NOUN', 'Noun', 'noia, gat, arbre, aire, bellesa'], + ['PROPN', 'PROPN', 'Proper noun', '[English] Mary, John, London, NATO, HBO, john.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\n11/11/1918, 11:00\n[English] one, two, three, seventy-seven\nk (abbreviation for thousand), m (abbreviation for million)\nI, II, III, IV, V, MMXIV'], + ['PART', 'PART', 'Particle', 'Possessive marker: [English] ’s\nNegation particle: [English] not; [German] nicht\nQuestion particle: [Japanese] か/ka (adding this particle to the end of a clause turns the clause into a question); [Turkish] mu\nSentence modality: [Czech] ať, kéž, nechť'], + ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: [English] I, you, he, she, it, we, they\nReflexive pronouns: [English] myself, yourself, himself, herself, itself, ourselves, yourselves, theirselves\nInterrogative pronouns: who, What (do you think?)\nRelative pronouns (unlike SCONJ relativizers, relative pronouns play a nominal role in the relative clause): [English] (a cat) who (eats fish), that, which, (I wonder) what (you think.)\nIndefinite pronouns: [English] somebody, something, anybody, anything\nTotal pronouns: [English] everybody, everything\nNegative pronouns: [English] nobody, nothing\nPossessive pronouns (which usually stand alone as a nominal): [English] mine, yours, his, hers, its, ours, theirs\nAttributive possessive pronouns (in some languages; others use DET for similar words): [English] my, your'], + ['VERB', 'VERB', 'Verb', '[English] run, eat\n[English] runs, ate\n[English] running, eating'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝'], + ['X', 'X', 'Other', '[English] (And then he just) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_dan_universal.py b/wordless/wl_tagsets/wl_tagset_dan_universal.py new file mode 100644 index 000000000..2c7083e8d --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_dan_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Danish +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Reference: https://universaldependencies.org/da/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'gammel/gammelt/gamle, grøn/grønt/grønne, ufatlig/ufatligt/ufatlige'], + ['ADP', 'ADP', 'Adposition', 'i, på, gennem'], + ['ADV', 'ADV', 'Adverb', 'meget (vigtigt), væk, (jeg spiser) ikke (rejer), pludselig'], + ['AUX', 'AUX', 'Auxiliary', 'Tense auxiliary: har (købt)\nModal auxiliary: kunne (tænke)\nPassive auxiliary: blev (fundet)\nCopula: var (grøn), er (en løsning)'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'og, eller, men'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'da, hvis, (konstatere) at (manden har søgt hjælp)'], + ['DET', 'DET', 'Determiner', 'Articles: en, et, den, det, de\nPossessive determiners: min (bil), deres (holdninger), dit (job)\nNegative determiners: (han har) ingen (empati)'], + ['INTJ', 'INTJ', 'Interjection', 'Hmm!, Åh!, Hej!'], + ['NOUN', 'NOUN', 'Noun', 'pige, kat, træ, luft, skønhed'], + ['PROPN', 'PROPN', 'Proper noun', 'Anna, Otto\nSkåne, USA\nTexaco, Pirelli'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 2014, 1 000 000, 3.000,15, 3,14159265359\net, to, tre, nitten\nI, II, III, IV, V, MMXIV'], + ['PART', 'PART', 'Particle', '(det er muligt) at (ændre det)'], + ['PRON', 'PRON', 'Pronoun', 'Personal (subject) pronouns: jeg, du, han, hun, det/den, vi, I, de\nPlaceholder personal pronoun: man (kan gå)\nPersonal (object)/reflexive pronouns: mig, dig, ham, henne, sig, os, hinanden\nDemonstrative pronouns: dette (er et svært spørgsmål)\nPossessive pronouns: vores\nInterrogative pronouns: hvad\nRelative pronouns: hvis\nIndefinite pronouns: nogen, noget\nTotality pronouns: alting\nNegative pronouns: ingen (af os)'], + ['VERB', 'VERB', 'Verb', 'at vise, jeg viser, han viste\nat flyve, vi flyver, de fløj'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '§'], + ['X', 'X', 'Other', 'musik(- og billedprogrammer)'] +] diff --git a/wordless/wl_tagsets/wl_tagset_ell_universal.py b/wordless/wl_tagsets/wl_tagset_ell_universal.py new file mode 100644 index 000000000..a49a06ff1 --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_ell_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Greek (Modern) +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Reference: https://universaldependencies.org/el/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'μεγάλος/megalos, πράσινος/prasinos, ακατάληπτος/akataliptos\nπρώτος/protos, δεύτερος/defteros, τρίτος/tritos\n(Η Ελένη είναι) ίδια (με την μητέρα της). It is also assigned the tag DET.'], + ['ADP', 'ADP', 'Adposition', 'σε, από, με, για, ως, εκ/εξ / se, apo, me, gia, os, ek/ex\n(μέσα) σε, (ενάντια) σε ενάντια\nμετά/εντός + ΝΟUN[Case=Gen]: μετά ληστείας, εντός δευτερολέπτων. In all other environments μετά, εντός are tagged ADV.'], + ['ADV', 'ADV', 'Adverb', 'Locative adverbs: απέξω/apekso, εδώ/edo, εκεί/eki, πάνω/pano, κάτω/kato, δεξιά/deksia, αριστερά/aristera, κάπου/kapou, παντού/pantou, πουθενά/pouthena, πού/pou\nManner adverbs: ακριβώς/akrivos, γιατί/yiati (when it is on its own / it introduces direct questions), εντάξει/endaksi, καλά/kala, κατανάγκη/katanagki, πώς/pos, υπόψη/ipopsi\nTemporal adverbs: αύριο/avrio, κάποτε/kapote, καταρχήν/katarchin, πάντα/panta, πέρσυ/persi, πότε/pote, ποτέ/pote, σήμερα/simera, τότε/tote, τώρα/tora, χθες/chthes\nQuantity adverbs: άπαξ/apaks, καθόλου/katholou, λίγο/ligo, μόνο/mono, τόσο/toso'], + ['AUX', 'AUX', 'Auxiliary', 'Tense auxiliaries: έχει φύγει / echi figi, θα φύγει / tha figi\nAuxiliaries with passive verb forms: έχει γραφτεί / echi grafti, θα γραφτεί / tha grafti\nAuxiliary use of είμαι: το τριαντάφυλο είναι λουλούδι / to triantafilo ine louloudi, ο Αλέξανδρος είναι ψηλός / o Alexandros ine psilos, το γράμμα είναι γραμμένο με σκούρο μελάνι / to grama ine grameno me skouro melani\nAuxiliary use of να: Να προσέχεις\nAuxiliary use of ας: Καλύτερα ας έχουμε το κεφάλι μας ήσυχο.'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'και, ή, αλλά, όμως, ωστόσο, είτε (εσύ) είτε (ο Παύλος), ούτε (εσύ) ούτε (ο Παύλος)'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'Complementizers: αν/an, άρα, αφού/ara, afou, γιατί/giati, ενώ/eno, καθώς/kathos, μήπως, ότι, πως/mipos, oti, pos, ώστε/oste, ώσπου/ospou\nAdverbial clause introducers: όταν/otan, αφότου/afotou, πριν/prin, μόλις/molis (when introducing a clause, not a nominal), μια (και) / mia (ke) (with the dependency fixed between them)\nInterrogative pronouns:\n\tποιος/pios, πόσος/posos\n\tτι/ti that introduce an indirect question that serves as an argument of a verb or as a clausal modifier of a deverbal noun\n\t\tVERB: ρώτησα ποιο/πόσο/τι θέλεις; / rotisa pio/poso/ti theleis\n\t\tNOUN: (έχω μείνει με την) απορία τί (στο καλό της είπες).'], + ['DET', 'DET', 'Determiner', 'Definite article: ο, η, το / o, i, to\nIndefinite article: ένας, μία, ένα / enas, mia, ena\nAdjectives denoting quantities and their comparatives:\n\tαρκετός/arketos, λίγος/ligos (ελάχιστος/elachistos), μερικός/merikos, μισός/misos, μόνος/monos, πολύς/polis\n\tολόκληρος/olokliros, όλος/olos, πας/άπας / pas/apas\nDemonstrative pronouns: αυτός/aftos, εκείνος/ekinos, (ε)τούτος/(e)toutos, τέτοιος/tetios, τόσος/tosos, ίδιος/idios\nIndefinite pronouns:\n\tάλλος/alos, κάποιος/kapios, κάτι/kati\n\tκάθε/kathe, καθένας/kathenas\n\tκανείς (κανένας) / kanis (kanenas), τίποτα/ε / tipota/e\nInterrogative pronouns when followed by a noun: ποιος/pios, πόσος/posos, τι/ti, e.g., ποιο/πόσο/τι φαγητό θέλεις; / pio/poso/ti fagito thelis?\nRelative pronouns when followed by a noun: όποιος/-δήποτε / opios/-dipote, όσος/-δήποτε / osos/-dipote, ό,τι/-δήποτε / oti/-dipote\nίδιος/idios'], + ['INTJ', 'INTJ', 'Interjection', 'αμήν, άντε, βρε, καλέ, ναι/ne, όχι/ochi, ορίστε, μα, λοιπόν/lipon, καλημέρα/kalimera, καληνύχτα/kalinichta, Καλά(, πώς ήρθες εδώ;)'], + ['NOUN', 'NOUN', 'Noun', 'γυναίκα/gineka, σκύλος/skilos, τραπέζι/trapezi, επανάσταση/epanastasi, ελευθερία/freedom\nProfessions: αστυνομικός/astinomikos, στρατιωτικός/stratiotikos. When the same words cooccur with another noun, such as αστυνομικός σκύλος / astinomikos skilos, they are assigned the tag ADJ.\nακουστικό/akoustiko, (καρτο-)κινητό / (karto-)kinito, ενδότερα/endotera, πρωϊνό/proino, μεσημεριανό/mesimeriano, βραδινό/vradino, λαδερά/ladera, λαϊκή, περιπολικό/peripoliko'], + ['PROPN', 'PROPN', 'Proper noun', 'Κύριε/kirie\nPlace names: Ανατολή/Anatoli, Δύση/Disi, Όλυμπος/olibos\nDay names: Τρίτη/Triti, Σαββατοκύριακο/Savatokiriako\nCountries: Eλλάδα/Elada, Κύπρος/Kipros\nDiminutives productively formed by a proper noun and a suffix such as –άκι, –ίτσα, –ούλης, -άκης/-aki, -itsa, -oulis, -akis: Mαράκι/Maraki, Γιαννάκης/Gianakis\nAugmentatives (μεγεθυντικά) productively formed by a proper noun and a suffix such as -άρας/-aras: Στελάρας/Stelaras, Σουλάρα/Soulara\nNames of anniversaries, bank holidays: Ανάσταση/Anastasi, Επιτάφιος/Epitafios, Μεγάλη Εβδομάδα / Megali Vdomada, Πάσχα/Pascha, Χριστούγεννα/Christougena, Πρωτοχρονιά/Protochronia\nPlace names:\n\tStreet names in the genitive case where the noun οδός/odos is omitted: (οδός) Ερμού / (odos) Ermou .\n\tAvenue/motorway names consisting of two place names in the genitive case (starting-ending places): Αθηνών-Κορίνθου. These are productive compounds. Each part of the compound is assigned the tag PROPN and the second proper noun depends on the first one with the relation compound; the first proper noun is considered the head of the compound.'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\nένα, δύο, τρία, εβδομήντα πέντε\nI, II, III, IV, V, MMXIV\nδωδεκάμιση'], + ['PART', 'PART', 'Particle', 'ας, δεν, καν, μπας (και), μην, να, όχι, πάρα, μακάρι'], + ['PRON', 'PRON', 'Pronoun', 'Interrogative pronouns in direct questions: Ποιο/Πόσο/Τι (θέλεις;)\nPersonal pronouns: both strong and weak types (clitics): του το (έδωσα) / (tou to) edosa\nPossessive pronouns: (το σπίτι) μου / (to spiti) mou\nReflexive pronouns: εαυτός/eaftos\nRelative pronouns: οποίος/opios'], + ['VERB', 'VERB', 'Verb', 'τρέχω/trecho, τρώει/troi\nτρέχοντας/trechodas, τρώγοντας/trogodas'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝'], + ['X', 'X', 'Other', '(H αναπαραγωγή δεν θα είναι εντελώς) Lossless.\n(Κάντο) φοργουόρντ (σε μένα.)'] +] diff --git a/wordless/wl_tagsets/wl_tagset_eng_penn_treebank.py b/wordless/wl_tagsets/wl_tagset_eng_penn_treebank.py index aac0e09ff..dd37e32eb 100644 --- a/wordless/wl_tagsets/wl_tagset_eng_penn_treebank.py +++ b/wordless/wl_tagsets/wl_tagset_eng_penn_treebank.py @@ -19,7 +19,7 @@ # References: # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html # https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/universal_tagset.zip -MAPPINGS = [ +tagset_mapping = [ ['CC', 'CCONJ', 'Coordinating conjunction', ''], ['CD', 'NUM', 'Cardinal number', ''], ['CD|RB', 'X', '', ''], diff --git a/wordless/wl_tagsets/wl_tagset_eng_universal.py b/wordless/wl_tagsets/wl_tagset_eng_universal.py new file mode 100644 index 000000000..87d4fdb16 --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_eng_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - English +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/u/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'big, old, green, African, incomprehensible, first, second, third'], + ['ADP', 'ADP', 'Adposition', 'in, to, during'], + ['ADV', 'ADV', 'Adverb', 'very, well, exactly, tomorrow, up, down\nInterrogative/relative adverbs (including when used to mark a clause that is circumstantial, not interrogative or relative): where, when, how, why, whenever, wherever\nDemonstrative adverbs: here, there, now, then\nIndefinite adverbs: somewhere, sometime, anywhere, anytime\nTotality adverbs: everywhere, always\nNegative adverbs: nowhere, never; [German] usw.'], + ['AUX', 'AUX', 'Auxiliary', 'Tense auxiliaries: has (done), is (doing), will (do)\nPassive auxiliaries: was (done), got (done)\nModal auxiliaries: should (do), must (do)\nVerbal copulas: (He) is (a teacher.)\nAgreement auxiliaries: [K’iche’] la (2nd person singular formal), alaq (2nd person plural formal)'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'and, or, but'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', '(I believe) that (he will come.), if, while'], + ['DET', 'DET', 'Determiner', 'Articles (a closed class indicating definiteness, specificity or givenness): a, an, the\nPossessive determiners (which modify a nominal; note that some languages use PRON for similar words): [Czech] můj, tvůj, jeho, její, náš, váš, jejich\nDemonstrative determiners: (I saw) this (car yesterday.)\nInterrogative determiners: Which (car do you like?)\nRelative determiners: (I wonder) which (car you like.)\nQuantity determiners (quantifiers):\n\tIndefinite: any\n\tUniversal: all\n\tNegative: (We have) no (cars available.)'], + ['INTJ', 'INTJ', 'Interjection', 'psst, ouch, bravo, hello'], + ['NOUN', 'NOUN', 'Noun', 'girl, tree, etc., beauty, decision'], + ['PROPN', 'PROPN', 'Proper noun', 'Mary, John, London, NATO, HBO, john.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\n11/11/1918, 11:00\none, two, three, seventy-seven\nk (abbreviation for thousand), m (abbreviation for million)\nI, II, III, IV, V, MMXIV'], + ['PART', 'PART', 'Particle', 'Possessive marker: ’s\nNegation particle: not; [German] nicht\nQuestion particle: [Japanese] か/ka (adding this particle to the end of a clause turns the clause into a question); [Turkish] mu\nSentence modality: [Czech] ať, kéž, nechť'], + ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: I, you, he, she, it, we, they\nReflexive pronouns: myself, yourself, himself, herself, itself, ourselves, yourselves, theirselves\nInterrogative pronouns: who, What (do you think?)\nRelative pronouns (unlike SCONJ relativizers, relative pronouns play a nominal role in the relative clause): (a cat) who (eats fish), that, which, (I wonder) what (you think.)\nIndefinite pronouns: somebody, something, anybody, anything\nTotal pronouns: everybody, everything\nNegative pronouns: nobody, nothing\nPossessive pronouns (which usually stand alone as a nominal): mine, yours, his, hers, its, ours, theirs\nAttributive possessive pronouns (in some languages; others use DET for similar words): my, your'], + ['VERB', 'VERB', 'Verb', 'run, eat\nruns, ate\nrunning, eating'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝'], + ['X', 'X', 'Other', '(And then he just) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_fra_universal.py b/wordless/wl_tagsets/wl_tagset_fra_universal.py new file mode 100644 index 000000000..88739f979 --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_fra_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - French +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/fr/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'grand/grande/grands/grandes, vieux/vieille/vieilles'], + ['ADP', 'ADP', 'Adposition', 'pour, de, à, dans'], + ['ADV', 'ADV', 'Adverb', 'très (joli), (fondues) ensemble'], + ['AUX', 'AUX', 'Auxiliary', 'être, avoir, faire'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'mais, ou, et, or, ni, car'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'quand\nMultiword subordinating conjunction: (parce) que, (afin) que, (avant) que)'], + ['DET', 'DET', 'Determiner', 'Articles (a closed class indicating definiteness, specificity or givenness): le, la, les\nPossessive determiners: mon, ton, son, ma, ta, sa, mes, tes, ses, notre, votre, leur, nos, vos, leurs\nDemonstrative determiners: (J’ai vu) ce (vélo hier.), cet, cette\nInterrogative determiners: quel, Quelle (couleur aimez-vous?)\nRelative determiners: quel, (Je me demande) quelle (couleur vous aimez.)\nQuantity/quantifier determiners: aucun'], + ['INTJ', 'INTJ', 'Interjection', 'bref, bon, enfin'], + ['NOUN', 'NOUN', 'Noun', 'fille, chat, arbre, air, beauté'], + ['PROPN', 'PROPN', 'Proper noun', 'Pierre, ONU, Mexique'], + ['NUM', 'NUM', 'Numeral', 'quatre, 4, IV'], + ['PART', 'PART', 'Particle', 'Negation particle: ne'], + ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: je, tu, il\nDemonstrative pronouns: ceux\nReflexive pronouns: me, se\nInterrogative/relative pronouns: qui, que'], + ['VERB', 'VERB', 'Verb', '(je) vois, (à) lire, (en) marchant'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], + ['X', 'X', 'Other', 'etc'] +] diff --git a/wordless/wl_tagsets/wl_tagset_jpn_unidic.py b/wordless/wl_tagsets/wl_tagset_jpn_unidic.py index bcc089b0b..38153aea4 100644 --- a/wordless/wl_tagsets/wl_tagset_jpn_unidic.py +++ b/wordless/wl_tagsets/wl_tagset_jpn_unidic.py @@ -19,7 +19,7 @@ # References: # UniDic: https://gist.github.com/masayu-a/e3eee0637c07d4019ec9 # spaCy: https://github.com/explosion/spaCy/blob/master/spacy/lang/ja/tag_map.py -MAPPINGS = [ +tagset_mapping = [ ['代名詞', 'PRON', 'Pronoun', ''], ['副詞', 'ADV', 'Adverb', ''], ['助動詞', 'AUX', 'Auxiliary verb', ''], diff --git a/wordless/wl_tagsets/wl_tagset_khm_alt.py b/wordless/wl_tagsets/wl_tagset_khm_alt.py index 7bb98f51f..e3820626c 100644 --- a/wordless/wl_tagsets/wl_tagset_khm_alt.py +++ b/wordless/wl_tagsets/wl_tagset_khm_alt.py @@ -17,7 +17,7 @@ # ---------------------------------------------------------------------- # Reference: https://www2.nict.go.jp/astrec-att/member/mutiyama/ALT/Khmer-annotation-guideline.pdf -MAPPINGS = [ +tagset_mapping = [ ['n', 'NOUN', 'General nouns, can be subjects or objects of tokens tagged by v', ''], ['v', 'VERB', 'General verbs, can take tokens tagged by n as arguments', ''], ['a', 'ADJ', 'General adjectives, can directly describe or modify tokens tagged by n', ''], diff --git a/wordless/wl_tagsets/wl_tagset_kor_mecab.py b/wordless/wl_tagsets/wl_tagset_kor_mecab.py index 73744eed7..a9c6c1e44 100644 --- a/wordless/wl_tagsets/wl_tagset_kor_mecab.py +++ b/wordless/wl_tagsets/wl_tagset_kor_mecab.py @@ -19,7 +19,7 @@ # References: # MeCab: https://docs.google.com/spreadsheets/u/0/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY/edit?usp=sharing # spaCy: https://github.com/explosion/spaCy/blob/2ce9a220dbd30d3a79c2a232230204a102fb3f1d/spacy/lang/ko/tag_map.py -MAPPINGS = [ +tagset_mapping = [ ['NNG', 'NOUN', '일반 명사', ''], ['NNP', 'PROPN', '고유 명사', ''], ['NNB', 'NOUN', '의존 명사', ''], diff --git a/wordless/wl_tagsets/wl_tagset_lao_seqlabeling.py b/wordless/wl_tagsets/wl_tagset_lao_seqlabeling.py index a8ae25150..2d27a7bd2 100644 --- a/wordless/wl_tagsets/wl_tagset_lao_seqlabeling.py +++ b/wordless/wl_tagsets/wl_tagset_lao_seqlabeling.py @@ -17,7 +17,7 @@ # ---------------------------------------------------------------------- # Reference: https://github.com/FoVNull/SeqLabeling/blob/main/reference/Lao_POS.tsv -MAPPINGS = [ +tagset_mapping = [ ['N', 'NOUN', '名词', ''], ['TTL', 'NOUN', '称号名词', ''], ['PRN', 'PROPN', '专有名词', ''], diff --git a/wordless/wl_tagsets/wl_tagset_lao_yunshan_cup_2020.py b/wordless/wl_tagsets/wl_tagset_lao_yunshan_cup_2020.py index 805165c57..69b465775 100644 --- a/wordless/wl_tagsets/wl_tagset_lao_yunshan_cup_2020.py +++ b/wordless/wl_tagsets/wl_tagset_lao_yunshan_cup_2020.py @@ -17,7 +17,7 @@ # ---------------------------------------------------------------------- # Reference: https://github.com/FoVNull/SeqLabeling/blob/main/reference/Lao_POS.tsv -MAPPINGS = [ +tagset_mapping = [ ['N', 'NOUN', '名词', ''], ['TTL', 'NOUN', '称号名词', ''], ['PRN', 'PROPN', '专有名词', ''], diff --git a/wordless/wl_tagsets/wl_tagset_nor_universal.py b/wordless/wl_tagsets/wl_tagset_nor_universal.py new file mode 100644 index 000000000..3f5511fad --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_nor_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Norwegian +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/no/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'stor, gammel, grønn'], + ['ADP', 'ADP', 'Adposition', 'i, på, utenfor'], + ['ADV', 'ADV', 'Adverb', '(Han kom) nettopp, Derfor (kom han), nesten (ferdig)'], + ['AUX', 'AUX', 'Auxiliary', 'Temporal: har (spist), er (kommet)\nPassive: blir (spist)\nModal: kan/skal/vil/må/bør (spise)\nCopula: er (god)'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'og, eller, men'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'Complementizers: at, om\nAdverbial clause introducers: når, siden, fordi'], + ['DET', 'DET', 'Determiner', 'Possessive: mitt (barn), våre (barn), (barnet) vårt\nDemonstrative: dette (barnet), det (barnet), den (bilen), (det) samme (barnet) , (det) andre (barnet), hvilken (bil), hvilket (hus)\nQuantifying: en (bil), et (barn), ei (jente), noen (biler), alle (biler), begge (bilene)'], + ['INTJ', 'INTJ', 'Interjection', 'ja, nei, hei, hallo, heisan, å, ok, piip'], + ['NOUN', 'NOUN', 'Noun', 'jente, katt, tre, luft, skjønnhet'], + ['PROPN', 'PROPN', 'Proper noun', 'Kari, Ola\nOslo, Bergen'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\ntre, femtito, fire-fem, tusen'], + ['PART', 'PART', 'Particle', '(Han liker) ikke å (spise is)'], + ['PRON', 'PRON', 'Pronoun', 'Personal: han, hun, det, ham, henne\nDemonstrative: dette\nReflexive: seg\nReciprocal: hverandre\nInterrogative: hvem, hva, hvilken\nTotality: alle\nIndefinite: noen\nRelative: som'], + ['VERB', 'VERB', 'Verb', 'løpe, løper, løp, (har) løpt\nspise, spiser, spiste, (har) spist'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '/, * *, *'], + ['X', 'X', 'Other', '[English] (And then he just) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_por_universal.py b/wordless/wl_tagsets/wl_tagset_por_universal.py new file mode 100644 index 000000000..f5cc89115 --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_por_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Portuguese +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/pt/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'grande, velho, verde, incompreensível, primeiro, segundo, terceiro'], + ['ADP', 'ADP', 'Adposition', 'em, de, para, a, durante'], + ['ADV', 'ADV', 'Adverb', 'muito, bem, exatamente, amanhã, acima, abaixo\nInterrogative or exclamative adverbs: onde, quando, como, por que\nDemonstrative adverbs: aqui, ali, agora, depois\nTotality adverbs: sempre\nNegative adverbs: nunca, sem'], + ['AUX', 'AUX', 'Auxiliary', 'Tense auxiliary: ir (futuro perifrástico)\nModal auxiliary (+ infinitive): poder, dever, continuar\nPassive auxiliary: ser, ter, ir'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'e, ou, mas'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', '(que as in Ele disse) que (ele viria.), se, porque'], + ['DET', 'DET', 'Determiner', 'Articles (a closed class indicating definiteness, specificity or givenness): o, a, os, as\nPossessive determiners: meu, teu, seu, minha, meus, dele, nosso\nDemonstrative determiners: este, isto, esta, aquele\nInterrogative determiners: qual\nRelative determiners: que\nQuantity/quantifier determiners: nenhum, todos'], + ['INTJ', 'INTJ', 'Interjection', 'bingo, claro, pronto, é'], + ['NOUN', 'NOUN', 'Noun', 'menina, gato, árvore, ar, beleza'], + ['PROPN', 'PROPN', 'Proper noun', 'Maria, João\nLondres, Goiânia\nONG, EUA'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\num, dois, três, trinta e sete\nI, II, III, IV, V, MMXIV'], + ['PART', 'PART', 'Particle', 'Negative particles: não, nem\nPrefixes: anti-, ex-, pós-, vice-, primeiro-, pró-, infra-'], + ['PRON', 'PRON', 'Pronoun', 'Clitic pronouns (including reflexive pronouns): se, me, te, lhe\nDemonstrative pronouns: isto, esse, aquilo\nPersonal pronouns: eu, tu, ele, vocês\nIndefinite pronouns: um, outro, qualquer\nPossessive pronouns: meu, seu, dele\nInterrogative pronouns: que, quanto, qual\nRelative pronouns: que, cujo, qual\nTotality pronouns: todo, todas\nNegative pronouns: nenhum, ninguém'], + ['VERB', 'VERB', 'Verb', 'correr, comer\ncorreu, comia\ncorrendo, comendo'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()\nQuotes: «, », “'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], + ['X', 'X', 'Other', '[English] (And then he just) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_rus_open_corpora.py b/wordless/wl_tagsets/wl_tagset_rus_open_corpora.py index d57a1128e..a47da68c9 100644 --- a/wordless/wl_tagsets/wl_tagset_rus_open_corpora.py +++ b/wordless/wl_tagsets/wl_tagset_rus_open_corpora.py @@ -17,7 +17,7 @@ # ---------------------------------------------------------------------- # Reference: https://pymorphy2.readthedocs.io/en/latest/user/grammemes.html -MAPPINGS = [ +tagset_mapping = [ ['NOUN', 'NOUN', 'Noun', 'хомяк'], ['ADJF', 'ADJ', 'Adjective (full)', 'хороший'], ['ADJS', 'ADJ', 'Adjective (short)', 'хорош'], diff --git a/wordless/wl_tagsets/wl_tagset_rus_russian_national_corpus.py b/wordless/wl_tagsets/wl_tagset_rus_russian_national_corpus.py index aff0f132b..67318e0bc 100644 --- a/wordless/wl_tagsets/wl_tagset_rus_russian_national_corpus.py +++ b/wordless/wl_tagsets/wl_tagset_rus_russian_national_corpus.py @@ -19,7 +19,7 @@ # References: # [Dead] http://www.ruscorpora.ru/en/corpora-morph.html # https://github.com/nltk/nltk/pull/2152 -MAPPINGS = [ +tagset_mapping = [ ['A', 'ADJ', 'Adjective', ''], ['A=m', 'ADJ', 'Adjective (masculine)', ''], diff --git a/wordless/wl_tagsets/wl_tagset_rus_universal.py b/wordless/wl_tagsets/wl_tagset_rus_universal.py new file mode 100644 index 000000000..de74b0a1b --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_rus_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Russian +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/ru/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'большой, старый, зеленый\nPossessive adjectives: студенческий, учительский\nпервый, второй, третий\nPassive participial adjective: сделанный\nPresent participial adjective, derived from present transgressive: делающий\nPast participial adjective, derived from past transgressive: сделавший'], + ['ADP', 'ADP', 'Adposition', 'в, к, на'], + ['ADV', 'ADV', 'Adverb', 'очень, хорошо, точно, завтра, вниз, наверх\nOrdinal numeral adverbs: впервые\nMultiplicative numeral adverbs: однажды, дважды, трижды\nInterrogative adverbs: где, куда, когда, как, почему\nDemonstrative adverbs: здесь, там, сейчас, потом, так\nIndefinite adverbs: где-то, куда-то, когда-то, как-то\nTotal adverbs: везде, всегда\nNegative adverbs: нигде, никогда'], + ['AUX', 'AUX', 'Auxiliary', 'Future tense. Finite future form of быть is combined with infinitive of the lexical verb. The auxiliary expresses person, number and tense: буду делать, будешь делать, будут делать. Note that a limited set of verbs can form future morphologically, without the auxiliary.\nConditional mood. Conditional form (historically aorist) of být is combined with past participle of the lexical verb. The auxiliary expresses person and number, the participle expresses gender and number: сделал бы, сделала бы, сделали бы.\nPassive voice. A form of быть (in various tenses and moods or in the infinitive) is combined with passive participle of the lexical verb. The auxiliary expresses person, number, tense(past and future) and mood, the participle expresses gender, number and voice: будет сделан, был сделан, был бы сделан.'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'и, или, но'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'что, если, как, чем'], + ['DET', 'DET', 'Determiner', 'Possessive determiners: мой, твой, его, её, наш, ваш, их\nReflexive possessive determiner: свой\nDemonstrative determiners: (Я видела) эту (машину вчера.)\nInterrogative determiners: Какая (машина тебе нравится?)\nRelative determiners: (Мне интересно,) которая (машина тебе нравится.)\nRelative possessive determiner: чей\nIndefinite determiners: некоторый\nTotal determiners: каждый\nNegative determiners: (У нас не осталось) никаких (машин.)'], + ['INTJ', 'INTJ', 'Interjection', 'ах, ого, ну, ради бога'], + ['NOUN', 'NOUN', 'Noun', 'девочка, кошка, дерево, воздух, красота, плавание'], + ['PROPN', 'PROPN', 'Proper noun', 'ООН'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\nI, II, III, IV, V, MMXIV\nодин, два, три, четыре, пять, семьдесят\nDenominators of fractions constitute a separate class of cardinal numerals: половина, треть, четверть\nCollective numerals (see specific-syntax on their morphosyntactic behavior): двое, трое, четверо, пятеро\nPronominal quantifiers of imprecise quantity: сколько, столько, предостаточно'], + ['PART', 'PART', 'Particle', 'Sentence modality: пусть\nже\n(Мне сегодня) аж (пять писем пришло.)'], + ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: я, ты, он, она, оно, мы, вы, они\nReflexive pronouns: себе, сам\nDemonstrative pronouns: (Я видел) это (вчера.)\nInterrogative pronouns: кто, Что (ты думаешь?)\nRelative pronouns: кто, (Мне интересно,) что (ты думаешь.)\nIndefinite pronouns: кто-то, что-то\nTotal pronouns: каждый, все\nNegative pronouns: никто, ничто'], + ['VERB', 'VERB', 'Verb', 'Infinitive: рисовать\nFinite indicative: рисую, рисуешь, рисует, рисуем, рисуете, рисуют, рисовал, рисовала, рисовало, рисовали\nFinite imperative: рисуй, рисуйте\nShort passive participle in different tenses: (на)рисован, рисуем\nParticiple in different tenses and voices, full forms: рисующий, рисовавший, рисуемый, рисованный\nConverb: рисуя, рисовав'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], + ['X', 'X', 'Other', '(И потом он просто) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_spa_universal.py b/wordless/wl_tagsets/wl_tagset_spa_universal.py new file mode 100644 index 000000000..7b68f828e --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_spa_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Spanish +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/es/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'grande, viejo, verde, incomprensible, primero, segundo, tercero'], + ['ADP', 'ADP', 'Adposition', 'a, ante, bajo, cabe, con, contra, de, desde, en, entre, hacia, hasta, para, por, según, sin, sobre, tras'], + ['ADV', 'ADV', 'Adverb', 'muy, bien, exactamente, mañana, arriba, abajo\nInterrogative adverbs: dónde, cuándo\nRelative adverbs (depending on context, these can be also subordinating conjunctions): donde, cuando\nDemonstrative adverbs: aquí, allí, ahora, después\nTotality adverbs: siempre\nNegative adverbs: nunca'], + ['AUX', 'AUX', 'Auxiliary', 'Copulas: ser, estar\npassive: ser (la sentencia fue publicada)\nProgressive: estar (mis hijos están estudiando inglés)\nPerfect tenses: haber (ha venido hoy)'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', '(María) y (Juan están estudiando.), (Quiero ir al cine,) pero (no tengo tiempo.), (Puedes estudiar inglés) o (francés.)\n(padre) e (hijo), (siete) u (ocho)'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'Complementizers: como, que, mientras, si\nAdverbial clause introducers when introducing a clause, not a nominal: como, cuando, ya que / porque'], + ['DET', 'DET', 'Determiner', 'Articles: definite - el, la, los, las; indefinite - un, una, unos, unas\nDemonstratives: este, esta, estos, estas, ese, esa, esos, esas, aquel, aquella, aquellos, aquellas\nPossessives: mi, mis, tu, tus, su, sus, nuestro, nuestra, nuestros, nuestras, vuestro, vuestra, vuestros, vuestras\nQuantifiers: todo, toda, todos, todas, mucho, mucha, muchos, muchas, poco, poca, pocos, pocas, algún, alguna, algunos, algunas, ningún, ninguna, bastantes, varios, varias\nStressed possessives: mío, mía, míos, mías, tuyo, tuya, tuyos, tuyas, suyo, suya, suyos, suyas, nuestro, nuestra, nuestros, nuestras, vuestro, vuestra, vuestros, vuestras'], + ['INTJ', 'INTJ', 'Interjection', 'psst, ay, bravo, hola, Sí(, porque…), No(, no lo creo)'], + ['NOUN', 'NOUN', 'Noun', 'chica, gato, árbol, aire, belleza'], + ['PROPN', 'PROPN', 'Proper noun', 'Madrid, Antonio, Los Ángeles'], + ['NUM', 'NUM', 'Numeral', 'Definite cardinal numerals: uno, dos, tres\nFractions: media, tercio'], + ['PART', 'PART', 'Particle', 'Possessive marker: [English] ’s\nNegation particle: [English] not; [German] nicht\nQuestion particle: [Japanese] か/ka (adding this particle to the end of a clause turns the clause into a question); [Turkish] mu\nSentence modality: [Czech] ať, kéž, nechť'], + ['PRON', 'PRON', 'Pronoun', 'Personal: yo, tú, él\nReflexive: me, te, se\nDemonstrative: este, ese, aquel\nRelative: que, quien, cual\nInterrogative/exclamatory: quién, qué, cuál\nIndefinite: alguien, algo, ninguno\nPossessive: mío, tuyo, suyo'], + ['VERB', 'VERB', 'Verb', '[English] run, eat\n[English] runs, ate\n[English] running, eating'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, +'], + ['X', 'X', 'Other', '[English] (And then he just) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_tha_blackboard.py b/wordless/wl_tagsets/wl_tagset_tha_blackboard.py index af37070f3..807c50d7e 100644 --- a/wordless/wl_tagsets/wl_tagset_tha_blackboard.py +++ b/wordless/wl_tagsets/wl_tagset_tha_blackboard.py @@ -20,7 +20,7 @@ # https://github.com/PyThaiNLP/pythainlp/blob/dev/docs/api/tag.rst#pythainlptag # https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/tag/blackboard.py # https://bitbucket.org/kaamanita/blackboard-treebank/src/master/Blackboard-Treebank.pdf -MAPPINGS = [ +tagset_mapping = [ ['AJ', 'ADJ', 'Adjective: Attribute, modifier, or description of a noun', 'ใหม่, พิเศษ , ก่อน, มาก, สูง'], ['AV', 'ADV', 'Adverb: Word that modifies or qualifies an adjective, verb, or another adverb', 'ก่อน, ก็, เล็กน้อย, เลย, สุด'], ['AX', 'AUX', 'Auxiliary: Tense, aspect, mood, and voice', 'เป็น, ใช่, คือ, คล้าย'], diff --git a/wordless/wl_tagsets/wl_tagset_tha_orchid.py b/wordless/wl_tagsets/wl_tagset_tha_orchid.py index 2cbd03f28..3d5ca28cb 100644 --- a/wordless/wl_tagsets/wl_tagset_tha_orchid.py +++ b/wordless/wl_tagsets/wl_tagset_tha_orchid.py @@ -19,7 +19,7 @@ # References: # https://github.com/PyThaiNLP/pythainlp/blob/dev/docs/api/tag.rst#pythainlptag # https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/tag/orchid.py -MAPPINGS = [ +tagset_mapping = [ ['NPRP', 'PROPN', 'Proper noun', 'วินโดวส์ 95, โคโรน่า, โค้ก'], ['NCNM', 'NOUN/NUM', 'Cardinal number', 'หนึ่ง, สอง, สาม, 1, 2, 10'], diff --git a/wordless/wl_tagsets/wl_tagset_ukr_universal.py b/wordless/wl_tagsets/wl_tagset_ukr_universal.py new file mode 100644 index 000000000..bd9868ba7 --- /dev/null +++ b/wordless/wl_tagsets/wl_tagset_ukr_universal.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------- +# Wordless: Tagsets - Universal POS tags - Ukrainian +# Copyright (C) 2018-2024 Ye Lei (叶磊) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# ---------------------------------------------------------------------- + +# Universal POS Tags: https://universaldependencies.org/uk/pos/ +tagset_mapping = [ + ['ADJ', 'ADJ', 'Adjective', 'великий, старий, зелений\nPossessive adjectives: батьків, материн\nперший, другий, третій\nPassive perfective participial adjective: зроблений\nPassive imperfective participial adjective: роблений\nPresent participial adjective (it is considered ungrammatical but still used occasionally, which is why it is encoded): роблячий\nPast participial adjective (it is considered ungrammatical but still used occasionally, which is why it is encoded): зробивший'], + ['ADP', 'ADP', 'Adposition', 'в, до, протягом'], + ['ADV', 'ADV', 'Adverb', 'дуже, добре, точно, завтра, вгору, вниз\nOrdinal numeral adverbs: вперше, вдруге, втретє\nMultiplicative numeral adverbs: двічі, тричі\nInterrogative adverbs: де, куди, коли, як, чому\nDemonstrative adverbs: тут, там, зараз, тоді, так\nIndefinite adverbs: десь, кудись, іноді, якось\nTotal adverbs: всюди, завжди\nNegative adverbs: ніде, ніколи'], + ['AUX', 'AUX', 'Auxiliary', 'бути'], + ['CONJ', 'CONJ', 'Coordinating/subordinating conjunction', 'See CCONJ and SCONJ'], + ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'і, й, та, або, але'], + ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'що, щоб, аби, якщо, як, ніж'], + ['DET', 'DET', 'Determiner', 'Possessive determiners: мій, твій, його, її, наш, ваш, їх\nReflexive possessive determiner: свій\nDemonstrative determiners: той, Цю (машину я бачила вчора.)\nInterrogative determiners: Котра (машина тобі подобається?)\nRelative determiners: (Мені цікаво,) котра (машина тобі подобається.)\nRelative possessive determiner: чий\nIndefinite determiners: деякий, якийсь\nTotal determiners: кожен, всякий\nNegative determiners: (Ми не маємо) жодної (машини.), ніякий'], + ['INTJ', 'INTJ', 'Interjection', 'ах, бум, ну, ба, браво'], + ['NOUN', 'NOUN', 'Noun', 'дівчинка, кіт, дерево, повітря, краса, плавання'], + ['PROPN', 'PROPN', 'Proper noun', 'Франкфурт (на) Майні, ООН'], + ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\nI, II, III, IV, V, MMXIV\nодин, два, три, чотири, п’ять, сімдесят\nDenominators of fractions constitute a separate class of cardinal numerals: половина, третина, четвертина (чверть). They are not considered numerals in the Ukrainian grammar. They are tagged NOUN.\nSpecial forms, so-called generic numerals: четверо, п’ятеро\nодні, двоє, троє'], + ['PART', 'PART', 'Particle', 'Sentence modality: но, хай, нехай\nтільки, аж'], + ['PRON', 'PRON', 'Pronoun', 'Personal pronouns: я, ти, він, вона, воно, ми, ви, вони\nReflexive pronouns: себе, се, собі, собою\nDemonstrative pronouns: Це (я бачила вчора.)\nInterrogative pronouns: хто, Що (ти думаєш?)\nRelative pronouns: хто, (Мене цікавить,) що (ти думаєш.)\nIndefinite pronouns: дехто, дещо\nTotal pronouns: кожен, всі\nNegative pronouns: ніхто, ніщо'], + ['VERB', 'VERB', 'Verb', 'нести\nнесу, несеш, несе, несемо, несете, несуть\nImperative in different persons and numbers: неси, несімо, несіть\nPast tense forms in different genders and numbers: ніс, несла, несло, несли\nPassive impersonal form: несено\nPresent and past adverbial participles: несучи, нісши'], + + ['PUNCT', 'PUNCT', 'Punctuation', 'Period: .\nComma: ,\nParentheses: ()'], + ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], + ['X', 'X', 'Other', '(А він тільки) xfgh pdl jklw'] +] diff --git a/wordless/wl_tagsets/wl_tagset_universal.py b/wordless/wl_tagsets/wl_tagset_universal.py deleted file mode 100644 index 7e16930c0..000000000 --- a/wordless/wl_tagsets/wl_tagset_universal.py +++ /dev/null @@ -1,40 +0,0 @@ -# ---------------------------------------------------------------------- -# Wordless: Tagsets - Universal POS tags -# Copyright (C) 2018-2024 Ye Lei (叶磊) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# ---------------------------------------------------------------------- - -# Universal POS Tags: http://universaldependencies.org/u/pos/all.html -MAPPINGS = [ - ['ADJ', 'ADJ', 'Adjectives', 'big, old, green, African, incomprehensible\nfirst, second, third'], - ['ADP', 'ADP', 'Adposition', 'in, to, during'], - ['ADV', 'ADV', 'Adverb', 'very, well, exactly, tomorrow, up, down\nwhere, when, how, why\nhere, there, now, then\nsomewhere, sometime, anywhere, anytime\neverywhere, always\nnowhere, never'], - ['AUX', 'AUX', 'Auxiliary', 'has (done), is (doing), will (do)\nwas (done), got (done)\nshould (do), must (do)\nis (He is a teacher.)'], - ['CONJ', 'CONJ', 'Conjunction', 'and, or, but\nthat (I believe that he will come), if, while'], - ['CCONJ', 'CCONJ', 'Coordinating conjunction', 'and, or, but'], - ['SCONJ', 'SCONJ', 'Subordinating conjunction', 'that (I believe that he will come), if, while'], - ['DET', 'DET', 'Determiner', 'the, a, an\n[Czech] můj, tvůj, jeho, její, náš, váš, jejich; [English] my, your\nthis (I saw this car yesterday)\nwhich (Which car do you like?)\nwhich (I wonder which car you like.)\nany, all, no (We have no cars available.)'], - ['INTJ', 'INTJ', 'Interjection', 'psst, ouch, bravo, hello'], - ['NOUN', 'NOUN', 'Noun', 'girl, cat, tree, air, beauty'], - ['PROPN', 'PROPN', 'Proper noun', 'Mary, John, London, NATO, HBO'], - ['NUM', 'NUM', 'Numeral', '0, 1, 2, 3, 4, 5, 2014, 1000000, 3.14159265359\none, two, three, seventy-seven\nI, II, III, IV, V, MMXIV'], - ['PART', 'PART', 'Particle', '[English] ‘s\n[English] not; [German] nicht\n[Japanese] か; [Turkish] mu\n[Czech] ať, kéž, nechť\n[Chinese] 了'], - ['PRON', 'PRON', 'Pronoun', 'I, you, he, she it, we, they\nmyself, yourself, himself, herself, itself, ourselves, yourselves, theirselves\nwho, what (What do you think?)\nwho, what (I wonder what you think.)\nsomebody, something, anybody, anything\neverybody, everything\nnobody, nothing\nmine, yours, his, hers, its, ours, theirs'], - ['VERB', 'VERB', 'Verb', 'run, eat\nruns, ate\nrunning, eating'], - - ['PUNCT', 'PUNCT', 'Punctuation', '. , ( )'], - ['SYM', 'SYM', 'Symbol', '$, %, §, ©\n+, −, ×, ÷, =, <, >\n:), ♥‿♥, 😝\njohn.doe@universal.org, http://universaldependencies.org/, 1-800-COMPANY'], - ['X', 'X', 'Other', 'xfgh, pdl, jklw'] -] diff --git a/wordless/wl_tagsets/wl_tagset_vie_underthesea.py b/wordless/wl_tagsets/wl_tagset_vie_underthesea.py index a9665ae45..17ec816cb 100644 --- a/wordless/wl_tagsets/wl_tagset_vie_underthesea.py +++ b/wordless/wl_tagsets/wl_tagset_vie_underthesea.py @@ -17,7 +17,7 @@ # ---------------------------------------------------------------------- # Reference: https://github.com/undertheseanlp/underthesea/wiki/M%C3%B4-t%E1%BA%A3-d%E1%BB%AF-li%E1%BB%87u-b%C3%A0i-to%C3%A1n-POS-Tag -MAPPINGS = [ +tagset_mapping = [ ['A', 'ADJ', 'Tính từ', 'nhiều, hơn, khác, gần, lớn'], ['Ab', 'ADJ', 'Tính từ mượn', 'sexy, Peace, đờmi'], ['B', 'X', 'Từ mượn', 'karaoke, nilông, fax, oxy'], diff --git a/wordless/wl_widgets/wl_item_delegates.py b/wordless/wl_widgets/wl_item_delegates.py index 9d18d22a7..8c06c805d 100644 --- a/wordless/wl_widgets/wl_item_delegates.py +++ b/wordless/wl_widgets/wl_item_delegates.py @@ -42,9 +42,7 @@ def __init__(self, parent, widget = None, row = None, col = None): def createEditor(self, parent, option, index): # pylint: disable=unused-argument if self.widget: widget = self.widget(parent) - - if not self.enabled: - widget.setEnabled(False) + widget.setEnabled(self.enabled) return widget @@ -58,11 +56,7 @@ class Wl_Item_Delegate_Combo_Box(Wl_Item_Delegate): def __init__(self, parent, items = None, row = None, col = None, editable = False): super().__init__(parent, row = row, col = col) - if items is None: - self.items = [] - else: - self.items = items - + self.items = items or [] self.editable = editable def paint(self, painter, option, index): @@ -100,9 +94,7 @@ def createEditor(self, parent, option, index): combo_box.addItems(self.items) combo_box.setEditable(self.editable) - - if not self.enabled: - combo_box.setEnabled(False) + combo_box.setEnabled(self.enabled) return combo_box else: @@ -127,9 +119,7 @@ def __init__(self, parent, Combo_Box, row = None, col = None): def createEditor(self, parent, option, index): if self.is_editable(index): combo_box = self.Combo_Box(parent) - - if not self.enabled: - combo_box.setEnabled(False) + combo_box.setEnabled(self.enabled) return combo_box else: