From 97668c0843d2a2abd1132d2d504cf9a1d5d079b8 Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Sun, 5 Nov 2023 13:51:44 +0100 Subject: [PATCH 1/9] added Welsh (cy) and Chechen (ce) --- num2words/__init__.py | 5 +- num2words/lang_CE.py | 509 ++++++++++++++++++++++++++++++++++++++ num2words/lang_CY.py | 556 ++++++++++++++++++++++++++++++++++++++++++ tests/test_ce.py | 344 ++++++++++++++++++++++++++ tests/test_cy.py | 430 ++++++++++++++++++++++++++++++++ 5 files changed, 1843 insertions(+), 1 deletion(-) create mode 100644 num2words/lang_CE.py create mode 100644 num2words/lang_CY.py create mode 100644 tests/test_ce.py create mode 100644 tests/test_cy.py diff --git a/num2words/__init__.py b/num2words/__init__.py index 789af300..5641598e 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -17,7 +17,8 @@ from __future__ import unicode_literals -from . import (lang_AM, lang_AR, lang_AZ, lang_BY, lang_CZ, lang_DE, lang_DK, +from . import (lang_AM, lang_AR, lang_AZ, lang_BY, + lang_CE, lang_CY, lang_CZ, lang_DE, lang_DK, lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO, lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, lang_FI, lang_FR, lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HU, lang_ID, @@ -31,6 +32,8 @@ 'ar': lang_AR.Num2Word_AR(), 'az': lang_AZ.Num2Word_AZ(), 'by': lang_BY.Num2Word_BY(), + 'ce': lang_CE.Num2Word_CE(), + 'cy': lang_CY.Num2Word_CY(), 'cz': lang_CZ.Num2Word_CZ(), 'en': lang_EN.Num2Word_EN(), 'en_IN': lang_EN_IN.Num2Word_EN_IN(), diff --git a/num2words/lang_CE.py b/num2words/lang_CE.py new file mode 100644 index 00000000..0d6c01ef --- /dev/null +++ b/num2words/lang_CE.py @@ -0,0 +1,509 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from .lang_EU import Num2Word_EU +from .currency import parse_currency_parts, prefix_currency + +# Chechen numbers inflect in case if without noun or +# use a special oblique ending when followed by a counted noun +# 4, 14, 40 and composites thereof agree in class (gender) with the +# noun. Chechen has 6 classes which are indicated by the initial +# letter of 4, 14 and 40. By default it is "д" but it can also be "б", "й" or "в". +# Indicate the needed class prefix as follows +# num2words(4, lang='ce', case="abs", clazz="б") + + +CARDINALS = { + "casenames": { + "abs": "Им.", + "gen": "Род.", + "dat": "Дат.", + "erg": "Эрг;", + "instr": "Твор.", + "mat": "Вещ.", + "comp": "Сравнит.", + "all": "Местн.", + }, + "casesuffix_cons": { # to be added to numerals with final consonant + "gen": "аннан", + "dat": "анна", + "erg": "амма", + "instr": "анца", + "mat": "аннах", + "comp": "аннал", + "all": "анга", + "obl": "ан", + "ORD": "алгӀа", + }, + "casesuffix_voc": {# to be added to numerals with final vowel + "gen": "ннан", + "dat": "нна", + "erg": "мма", + "instr": "нца", + "mat": "ннах", + "comp": "ннал", + "all": "нга", + "obl": "н", + "ORD": "лгӀа", + }, + 0: { "attr": "ноль", + "abs": "ноль", + "gen": "нолан", + "dat": "нолана", + "erg": "ноло", + "instr": "ноланца", + "mat": "ноланах", + "comp": "ноланал", + "all": "ноланга", + }, + 1: { + "attr": "цхьа", # in front of nouns in ABS + "obl": "цхьана", # with nouns in other cases than ABS + "abs": "цхьаъ", + "gen": "цхьаннан", + "dat": "цхьанна", + "erg": "цхьамма", + "instr": "цхьаьнца", + "mat": "цхьаннах", + "comp": "цхьаннал", + "all": "цхаьнга", + "ORD": "цхьалгӀа", + }, + 2: { + "attr": "ши", #in front of 100, 1000 + "obl": "шина", + "abs": "шиъ", + "gen": "шиннан", + "dat": "шинна", + "erg": "шимма", + "instr": "шинца", + "mat": "шиннах", + "comp": "шиннал", + "all": "шинга", + "ORD": "шолгӀа", + }, + 3: { + "attr": "кхо", + "obl": "кхона", + "abs": "кхоъ", + "gen": "кхааннан", + "dat": "кхаанна", + "erg": "кхаамма", + "instr": "кхаанца", + "mat": "кхааннах", + "comp": "кхааннал", + "all": "кхаанга", + "ORD": "кхоалгӀа", + + }, + 4: { + "attr": "д*и", + "obl": "д*еа", + "abs": "д*иъ", + "gen": "д*еаннан", + "dat": "д*еанна", + "erg": "д*еамма", + "instr": "д*еанца", + "mat": "д*еаннах", + "comp": "д*еаннал", + "all": "д*еанга", + "ORD": "д*оьалгӀа", + }, + + 5: { + "attr": "пхи", + "obl": "пхеа", + "abs": "пхиъ", + "gen": "пхеаннан", + "dat": "пхеанна", + "erg": "пхеамма", + "instr": "нхеанца", + "mat": "пхеаннах", + "comp": "пхеаннал", + "all": "пхеанга", + "ORD": "пхоьалгӀа", + }, + + 6: { + "abs": "ялх", + "attr": "ялх", + "ORD" : "йолхалгӀа", + }, + 7: { + "abs": "ворхӀ", + "attr": "ворхӀ", + "ORD": "ворхӀалгӀа", + }, + 8: { + "abs": "бархӀ", + "attr": "бархӀ", + "ORD": "борхӀалӀа", + }, + 9: { + "abs": "исс", + "attr": "исс", + "ORD": "уьссалгӀа", + }, + 10: { + "attr": "итт", + "abs": "итт", + "gen": "иттаннан", + "dat": "иттанна", + "erg": "иттамма", + "instr": "иттанца", + "mat": "иттаннах", + "comp": "иттаннал", + "all": "иттанга", + "ORD": "уьтталгӀа", + }, + 11: { + "abs": "цхьайтта", + "attr": "цхьайтта", + "ORD": "цхьайтталгӀа", + }, + 12: { + "abs": "шийтта", + "attr": "шийтта", + "ORD": "шийтталга", + }, + 13: { + "abs": "кхойтта", + "attr": "кхойтта", + "ORD": "кхойтталгӀа", + }, + 14: { + "abs": "д*ейтта", + "attr": "д*ейтта", + "ORD": "д*ейтталгӀа", + }, + 15: { + "abs": "пхийтта", + "attr": "пхийтта", + "ORD": "пхийтталгӀа", + }, + 16: { + "abs": "ялхитта", + "attr": "ялхитта", + "ORD": "ялхитталгӀа", + }, + 17: { + "abs": "вуьрхӀитта", + "attr": "вуьрхӀитта", + "ORD": "вуьрхӀитталгӀа", + }, + 18: { + "abs": "берхӀитта", + "attr": "берхӀитта", + "ORD": "берхитталӀа", + }, + 19: { + "abs": "ткъайесна", + "attr": "ткъайесна", + "ORD": "ткъаесналгӀа", + }, + 20: { + "abs": "ткъа", + "gen": "ткъаннан", + "dat": "ткъанна", + "erg": "ткъамма", + "instr": "ткъанца", + "mat": "ткъаннах", + "comp": "ткъаннал", + "all": "ткъанга", + "attr": "ткъе", + "ORD": "ткъолгӀа", + }, + 40: { + "abs": "шовзткъа", + "attr": "шовзткъе", + "ORD": "шовзткъалгІа", + }, + 60: { + "abs": "кхузткъа", + "attr": "кхузткъе", + "ORD": "кхузткъалгІа", + }, + 80: { + "abs": "дезткъа", + "attr": "дезткъе", + "ORD": "дезткъалгІа", + }, + 100: { + "attr": "бӀе", + "abs": "бӀе", + "obl": "бӀен", + "gen": "бӀеннан", + "dat": "бӀенна", + "erg": "бӀемма", + "instr": "бӀенца", + "mat": "бӀеннах", + "comp": "бӀеннал", + "all": "бӀенга", + "ORD": "бІолгІа", + }, + 1000: { + "attr": "эзар", + "abs": "эзар", + "obl": "эзаран", + "gen": "эзарнан", + "dat": "эзарна", + "erg": "эзарно", + "instr": "эзарнаца", + "mat": "эзарнах", + "comp": "эзарнал", + "all": "эзаранга", + "ORD": "эзарлагІа", + }, + 1000000: { + "attr": "миллион", + "abs": "миллион", + "ORD": "миллионалгІа", + } + } + +ILLIONS = { + 6: { + "attr": "миллион", + "abs": "миллион", + "ORD": "миллионалгІа", + }, + 9: { + "attr": "миллиард", + "abs": "миллиард", + "ORD": "миллиардалгІа", + }, + 12: { + "attr": "биллион", + "abs": "биллион", + "ORD": "биллионалгІа", + }, + 15: { + "attr": "биллиард", + "abs": "биллиард", + "ORD": "биллиардалгІа", + }, + 18: { + "attr": "триллион", + "abs": "триллион", + "ORD": "триллионалгІа", + }, + 21: { + "attr": "триллиард", + "abs": "триллиард", + "ORD": "триллиардалгІа", + }, + 24: { + "attr": "квадриллион", + "abs": "квадриллион", + "ORD": "квадриллионалгІа", + }, + 27: { + "attr": "квадриллиард", + "abs": "квадриллиард", + "ORD": "квадриллиардалгІа", + }, + 30: { + "attr": "квинтиллион", + "abs": "квинтиллион", + "ORD": "квинтиллионалгІа", + }, + 33: { + "attr": "квинтиллиард", + "abs": "квинтиллиард", + "ORD": "квинтиллиардалгІа", + }, +} + + +MINUS = "минус" +DECIMALPOINT = "запятая" # check ! + +class Num2Word_CE(Num2Word_EU): + CURRENCY_FORMS = { + # currency code: (sg, pl), (sg, pl) + + 'EUR': (('Евро', 'Евро'), ("Сент", "Сенташ")), + 'RUB': (("Сом", "Сомаш"), ("Кепек", "Кепекаш")), + 'USD': (("Доллар", "Доллараш"), ("Сент", "Сенташ")), + 'GBP': (("Фунт", "Фунташ"), ("Пенни", "Пенни")) + } + + def setup(self): + Num2Word_EU.setup(self) + + def __init__(self): + pass + + def to_ordinal(self, number, clazz="д"): + # implement here your code. number is the integer to be transformed into an ordinal + # as a word (str) + # which is returned + return self.to_cardinal(number, clazz=clazz, case="ORD") + + def to_cardinal(self, number, clazz="д", case="abs"): + if isinstance(number, float): + entires = self.to_cardinal(int(number)) + float_part = str(number).split('.')[1] + postfix = " ".join( + # Drops the trailing zero and comma + [self.to_cardinal(int(c)) for c in float_part] + ) + return entires + " " + DECIMALPOINT + " " + postfix + + elif number < 20: + #if case in CARDINALS[number]: + # return CARDINALS[number][case] + #else: + # add casesuffix to ABS stem + return self.makecase(number, case, clazz) + #if CARDINALS[number]["abs"][-1] in "а": + # return CARDINALS[number]["abs"] + CARDINALS["casesuffix_voc"][case] + #else: + # return CARDINALS[number]["abs"] + CARDINALS["casesuffix_cons"][case] + elif number < 100: + twens = number // 20 + units = number % 20 + base = twens*20 + if units == 0: + return self.makecase(number, case, clazz) + else: + twenties = self.makecase(base, "attr", clazz) + rest = self.to_cardinal(units, clazz=clazz, case=case) + return twenties + " " + rest.replace("д*", clazz) + elif number < 1000: + hundreds = number // 100 + tens = number % 100 + if hundreds > 1: + hundert = CARDINALS[hundreds]["attr"].replace("д*", clazz) + " " + else: + hundert = "" + if tens != 0: + rest = self.to_cardinal(tens, clazz=clazz, case=case) + return hundert + CARDINALS[100]["abs"] + " " + rest + else: + return hundert + self.makecase(100, case, clazz) + elif number < 1000000: + thousands = number // 1000 + hundert = number % 1000 + if hundert > 0: + tcase = "attr" + else: + tcase = case + if thousands > 1: + tausend = self.to_cardinal(thousands, clazz=clazz, case="attr") + " " + CARDINALS[1000][tcase] + else: + tausend = self.makecase(1000, tcase, clazz) + + if hundert != 0: + rest = " " + self.to_cardinal(hundert, clazz=clazz, case=case) + else: + rest = "" + return tausend + rest + + elif number < 10**34: + out = [] + for pot in reversed([6,9,12,15,18,21,24,27,30,33]): + # 3 digits of billion, trillion etc + step = number // 10**pot % 1000 + if step > 0: + words = self.to_cardinal(step, clazz=clazz, case="attr") + out.append(words + " " + ILLIONS[pot]["attr"]) + rest = number % 10**6 + if rest: + out.append(self.to_cardinal(rest, clazz=clazz, case=case)) + return " ".join(out) + + return "NOT IMPLEMENTED" + + def _money_verbose(self, number, currency, case): + mcase ="attr" + if case != "abs": + mcase = "obl" + return self.to_cardinal(number, case=mcase) + + def _cents_verbose(self, number, currency, case): + mcase ="attr" + if case != "abs": + mcase = "obl" + return self.to_cardinal(number, case="attr") + + def to_currency(self, val, currency='RUB', cents=True, separator=',', + adjective=False, case="abs"): + """ + Args: + val: Numeric value + currency (str): Currency code + cents (bool): Verbose cents + separator (str): Cent separator + adjective (bool): Prefix currency name with adjective + Returns: + str: Formatted string + + """ + left, right, is_negative = parse_currency_parts(val) + + try: + cr1, cr2 = self.CURRENCY_FORMS[currency] + devise = cr1[0] + centime = cr2[0] + #if case != "abs": + # if devise[-1] in "аеиоуяю": + # devise += CARDINALS["casesuffix_voc"][case] + # else: + # devise += CARDINALS["casesuffix_cons"][case] + # if centime[-1] in "аеиоуяю": + # centime += CARDINALS["casesuffix_voc"][case] + # else: + # centime += CARDINALS["casesuffix_cons"][case] + except KeyError: + raise NotImplementedError( + 'Currency code "%s" not implemented for "%s"' % + (currency, self.__class__.__name__)) + + #if adjective and currency in self.CURRENCY_ADJECTIVES: + # cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) + + minus_str = "%s " % self.negword.strip() if is_negative else "" + money_str = self._money_verbose(left, currency, case) + cents_str = self._cents_verbose(right, currency, case) \ + if cents else self._cents_terse(right, currency) + + + + + return u'%s%s %s%s %s %s' % ( + minus_str, + money_str, + devise, # always singular + separator, + cents_str, + centime + ) + + + def makecase(self, number, case, clazz): + #print("ZZZZ", number, CARDINALS[number]) + if case in CARDINALS[number]: + return CARDINALS[number][case].replace("д*", clazz) + else: + if CARDINALS[number]["abs"][-1] in "а": + return CARDINALS[number]["abs"].replace("д*", clazz) + CARDINALS["casesuffix_voc"][case] + else: + return CARDINALS[number]["abs"].replace("д*", clazz) + CARDINALS["casesuffix_cons"][case] + + diff --git a/num2words/lang_CY.py b/num2words/lang_CY.py new file mode 100644 index 00000000..6d6f3b1e --- /dev/null +++ b/num2words/lang_CY.py @@ -0,0 +1,556 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from .lang_EU import Num2Word_EU +from .currency import parse_currency_parts, prefix_currency + +# Welsh numerals differs to many other languages since the counted +# object does not follow the numeral but is inserted between +# e.g. "23 hours" is +# tri awr ar hugain +# 3 hour on twenty +# in addition to that some numeral trigger a mutation on the following word +# either another numeral or the counted object +# (https://en.wikipedia.org/wiki/Consonant_mutation#Welsh) +# e.g. "23 dogs" (aspirated mutation, c -> ch) +# tri chi ar hugain +# 3 dog on twenty +# but "22 dogs" (soft mutation, c -> g) +# dau gi ar hugain +# 2 dog on twenty +# and "24 dogs" (no mutation) +# pedwar ci ar hugain +# 4 dog on twenty +# (BTW, the counted word is always in singular when following a numeral) +# numerals are mutated as well +# e.g. "300" +# tri chant +# 3 hundred +# "200" +# dau gant +# 2 hundred +# "500" +# pump cant +# 5 hundreds +# the numerals for 2, 3 and 4 are different in function of gender (MASC, FEM) +# 2 cats +# dwy gath + +# 2 dogs +# dau gi + +# 2000 +# dwy fil + +# 3000 +# tair mil + +# to add the counted object in the correct position use +# num2words(17, lang="cy", counted="ci", gender="masc") +# num2words(17, lang="cy", counted="cath", gender="fem") +# if the number is > 99, use plural form of counted object +# num2words(117, lang="cy", counted="cathod", gender="fem") + + +# Globals +# ------- + +OBJ = "__OBJECT__" + +CARDINAL_WORDS = { + # masc, fem, triggers mutation + 0: [("dim", None), (OBJ, None)], + 1: [("un", None), (OBJ, None)], + 2: [("dau", "SM"), (OBJ, None)], + 3: [("tri", "AM"), (OBJ, None)], + 4: [("pedwar", None), (OBJ, None)], + 5: [("pump", None), (OBJ, None)], + 6: [("chwech", "AM"), (OBJ, None)], + 7: [("saith", None), (OBJ, None)], + 8: [("wyth", None), (OBJ, None)], + 9: [("naw", None), (OBJ, None)], + 10: [("deg", None), (OBJ, None)], + 11: [("un", None), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddeg", None), (OBJ, None)], + 13: [("tri", "AM"), (OBJ, None), ("ar ddeg", None)], + 14: [("pedwar", None), (OBJ, None), ("ar ddeg", None)], + 15: [("pymtheg", None), (OBJ, None)], + 16: [("un", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("dau", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunaw", None), (OBJ, None)], + 19: [("pedwar", None), ("ar bymtheg", None)], + } + +CARDINAL_WORDS_FEM = { + # masc, fem, triggers mutation + 0: [("dim", None), (OBJ, None)], + 1: [("un", None), (OBJ, None)], + 2: [("dwy", "SM"), (OBJ, None)], + 3: [("tair", None), (OBJ, None)], + 4: [("pedair", None), (OBJ, None)], + 5: [("pump", None), (OBJ, None)], + 6: [("chwech", "AM"), (OBJ, None)], + 7: [("saith", None), (OBJ, None)], + 8: [("wyth", None), (OBJ, None)], + 9: [("naw", None), (OBJ, None)], + 10: [("deg", None), (OBJ, None)], + 11: [("un", None), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddeg", None), (OBJ, None)], + 13: [("tair", None), (OBJ, None), ("ar ddeg", None)], + 14: [("pedair", None), (OBJ, None), ("ar ddeg", None)], + 15: [("pymtheg", None), (OBJ, None)], + 16: [("un", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("dwy", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunaw", None), (OBJ, None)], + 19: [("pedair", None), ("ar bymtheg", None)], + } + + + +MILLION_WORDS = { 3: ("mil",None), + 6: ("miliwn",None), + 9: ("biliwn",None), + 12: ("triliwn", None), + 15: ("cwadriliwn", None), + 18: ("cwintiliwn", None), + 21: ("secsttiliwn", None), + 24: ("septiliwn", None), + 27: ("octiliwn", None), + 30: ("noniliwn", None), + 33: ("dengiliwn", None), + } + +ORDINAL_WORDS = { + 0: [("dimfed", None), (OBJ, None)], + 1: [(OBJ, None), ("cyntaf", None)], + 2: [("ail", "SM"), (OBJ, None)], + 3: [("trydydd", None), (OBJ, None)], + 4: [("pedwerydd", None), (OBJ, None)], + 5: [("pumed", None), (OBJ, None)], + 6: [("chweched", None), (OBJ, None)], + 7: [("saithfed", None), (OBJ, None)], + 8: [("wythfed", None), (OBJ, None)], + 9: [("nawfed", None), (OBJ, None)], + 10: [("degfed", None), (OBJ, None)], + 11: [("unfed", "SM"), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddegfed", None), (OBJ, None)], + 13: [("trydydd", None), (OBJ, None), ("ar ddeg", None)], + 14: [("pedwerydd", None), (OBJ, None), ("ar ddeg", None)], + 15: [("pymthegfed", None), (OBJ, None)], + 16: [("unfed", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("ail", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunawfed", None), (OBJ, None)], + 19: [("pedwerydd", None), (OBJ, None), ("ar bymtheg", None)], +} +ORDINAL_WORDS_FEM = { + 0: [("dimfed", None), (OBJ, None)], + 1: [(OBJ, None), ("gyntaf", None)], + 2: [("ail", "SM"), (OBJ, None)], + 3: [("trydedd", "SM"), (OBJ, None)], + 4: [("pedwaredd", "SM"), (OBJ, None)], + 5: [("pumed", None), (OBJ, None)], + 6: [("chweched", None), (OBJ, None)], + 7: [("saithfed", None), (OBJ, None)], + 8: [("wythfed", None), (OBJ, None)], + 9: [("nawfed", None), (OBJ, None)], + 10: [("degfed", None), (OBJ, None)], + 11: [("unfed", "SM"), (OBJ, None), ("ar ddeg", None)], + 12: [("deuddegfed", None), (OBJ, None)], + 13: [("trydedd", "SM"), (OBJ, None), ("ar ddeg", None)], + 14: [("pedwaredd", "SM"), (OBJ, None), ("ar ddeg", None)], + 15: [("pymthegfed", None), (OBJ, None)], + 16: [("unfed", None), (OBJ, None), ("ar bymtheg", None)], + 17: [("ail", "SM"), (OBJ, None), ("ar bymtheg", None)], + 18: [("deunawfed", None), (OBJ, None)], + 19: [("pedwaredd", None), (OBJ, None), ("ar bymtheg", None)], +} + +# The script can extrapolate the missing numbers from the base forms. +STR_TENS = {1: [("ugain", None), (OBJ, None)], + 2: [("deugain", None), (OBJ, None)], + 3: [("trigain", None), (OBJ, None)], + 4: [("pedwar ugain", None), (OBJ, None)], + } + +ORD_STR_TENS = {1: [("ugainfed", None), (OBJ, None)], + 2: [("deugainfed", None), (OBJ, None)], + 3: [("trigainfed", None), (OBJ, None)], + 4: [("pedwar ugainfed", None), (OBJ, None)] + } +STR_TENS_INFORMAL = {1: ("undeg", None), 2: ("dauddeg", None), 3: ("trideg", None), + 4: ("pedwardeg", None), 5: ("pumdeg", None), 6: ("chwedeg", None), + 7: ("saithdeg", None), 8: ("wythdeg", None), 9: ("nawdeg", None) +} + + + +GENERIC_DOLLARS = ('dolar', 'dolarau') +GENERIC_CENTS = ('ceiniog', 'ceiniogau') + +CURRENCIES_FEM = ["GBP"] + +class Num2Word_CY(Num2Word_EU): + CURRENCY_FORMS = { + # currency code: (sg, pl), (sg, pl) + # in Welsh a noun after a numeral is ALWAYS in the singular + 'EUR': (('euro', 'euros'), GENERIC_CENTS), + 'USD': (GENERIC_DOLLARS, GENERIC_CENTS), + 'GBP': (('punt', 'punnoedd'), ('ceiniog', 'ceiniogau')), + 'CNY': (('yuan', 'yuans'), ('ffen', 'ffens')), + } + + MINUS_PREFIX_WORD = "meinws " + FLOAT_INFIX_WORD = " pwynt " + + def setup(self): + Num2Word_EU.setup(self) + + def __init__(self): + pass + + def float_to_words(self, float_number, ordinal=False): + if ordinal: + prefix = self.to_ordinal(int(float_number)) + else: + prefix = self.to_cardinal(int(float_number)) + float_part = str(float_number).split('.')[1] + postfix = " ".join( + # Drops the trailing zero and comma + [self.to_cardinal(int(c)) for c in float_part] + ) + return prefix + Num2Word_CY.FLOAT_INFIX_WORD + postfix + + + def hundred_group(self, number, informal=False, gender="masc", ordinal=False): + hundreds = number // 100 + until100 = number % 100 # 0 - 99 + result = [] # list group of number words and mutation info (for the following word) + if gender == "fem": + CW = CARDINAL_WORDS_FEM + else: + if ordinal: + CW = ORDINAL_WORDS + else: + CW = CARDINAL_WORDS + + if hundreds > 0: + if hundreds > 1: + result.extend((CARDINAL_WORDS[hundreds])) + result.extend([("cant", None), (OBJ, None)]) + if until100: + if until100 in [1,8,11,16,20,21,31,36,41,48,61,68,71,81,88,91]: + result.append(("ac", None)) + else: + result.append(("a", "AM")) + if until100: + if informal: + pass + elif not ordinal and until100 >= 50 and until100 <= 59: + units = number % 10 + if hundreds > 0: + if units == 0: + result.append(("hanner", None)) + elif units == 1: + result.extend([("hanner ac un", None), (OBJ, None)]) + else: + result.append(("hanner a", "AM")) + result.extend(CW[units]) + else: + if units == 0: + result.extend([("hanner cant", None), (OBJ, None)]) + elif units == 1: + result.extend([("hanner cant ac un", None), (OBJ, None)]) + else: + result.append(("hanner cant a", "AM")) + result.extend(CW[units]) + else: + if (number < 20 and number > 0) or (number == 0 and hundreds == 0): + if gender == "fem": + + result.extend(CARDINAL_WORDS_FEM[int(number)]) + else: + result.extend(CARDINAL_WORDS[int(number)]) + + else: + tens = until100 // 20 + units = number % 20 + if ordinal and units == 0: + degau = ORD_STR_TENS.get(tens) + else: + degau = STR_TENS.get(tens) + #print("BBBB", number, tens, degau) #, softmutation(degau)) + + if units != 0: + if tens > 1: + result.extend(CW[units]) + if degau: + result.append(("a", "AM")) + result.extend(degau) + else: + result.extend(CW[units]) + if degau: + result.append(("ar", "SM")) + result.extend(degau) + elif degau: + result.extend(degau) + return result + + + + + def to_ordinal(self, number, informal=False, gender="masc"): + if number < 20: + return makestring(ORDINAL_WORDS[number]) + if number == 100: + return "canfed" + elif number > 100: + raise NotImplementedError("The given number is too large.") + + return self.to_cardinal(number, informal=False, gender=gender, ordinal=True) + + + def to_cardinal(self, number, informal=False, gender="masc", ordinal=False, counted=None, raw=False): + negative = False + if number < 0: + negative = True + number = -1 * number + if number == 0: + if raw: + return CARDINAL_WORDS[0] + else: + return makestring(CARDINAL_WORDS[0]) + elif not number < 999 * 10**33: + raise NotImplementedError("The given number is too large.") + + elif isinstance(number, float): + return self.float_to_words(number) + + # split in groups of 10**3 + groups = [] # groups of three digits starting from right (units (1 - 999), thousands, millions, .. + lowestgroup = None # find the lowest group of 3 digits > 0 for the ordinals + for pot in [3,6,9,12,15,18,21,24,27,30,33,36]: + gr = (number % 10**pot) // 10**(pot-3) + groups.append((gr, pot)) + if gr and not lowestgroup: + lowestgroup = gr + #print("groups", groups) + + result = [] + if negative: + result.append(("meinws", None)) + + for gr,pot in reversed(groups): + if gr: + #print("AAAA", gr, pot, gender) + if pot == 6: + g = "fem" # mil (1000) is feminine + elif pot == 3: + g = gender # units depend on the following noun + else: + g = "masc" # millions etc are masculine + # "mil" is feminine + if gr > 1 or pot == 3: + words = self.hundred_group(gr, informal=informal, gender=g, ordinal = ordinal and (lowestgroup==gr)) + result += words + # print(">>>> ", words) + if pot > 3: + result.append(MILLION_WORDS[pot-3]) + if raw: + # need to be able trigger correct mutation on currencies + return result + else: + if number < 100: + return makestring(result, counted=counted) + else: + if counted: + result.extend([("o", "SM"), (counted, None)]) + return makestring(result) + + def to_currency(self, val, currency='EUR', cents=True, separator=',', + adjective=False): + """ + Args: + val: Numeric value + currency (str): Currency code + cents (bool): Verbose cents + separator (str): Cent separator + adjective (bool): Prefix currency name with adjective + Returns: + str: Formatted string + + """ + left, right, is_negative = parse_currency_parts(val) + try: + cr1, cr2 = self.CURRENCY_FORMS[currency] + + except KeyError: + raise NotImplementedError( + 'Currency code "%s" not implemented for "%s"' % + (currency, self.__class__.__name__)) + + if adjective and currency in self.CURRENCY_ADJECTIVES: + cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) + + minus_str = "%s " % self.negword.strip() if is_negative else "" + money_str = self._money_verbose(left, currency) + cents_str = self._cents_verbose(right, currency) \ + if cents else self._cents_terse(right, currency) + + if right == 0: + # no pence + return u'%s%s' % ( + minus_str, + money_str, + #self.pluralize(right, cr2) + ) + elif left == 0: + # no pence + return u'%s%s' % ( + minus_str, + cents_str, + #self.pluralize(right, cr2) + ) + + return u'%s%s%s %s' % ( + minus_str, + money_str, + #self.pluralize(left, cr1), + separator, + cents_str, + #self.pluralize(right, cr2) + ) + + def _money_verbose(self, number, currency): + # used in super().to_currency(), we need to add gender here for feminine currenceis + if currency in CURRENCIES_FEM: + if number > 100: + m = self.to_cardinal(number, gender="fem", raw=True) + if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][1] + m.append(("o", "SM")) + m.append((c, None)) + else: + c = currency + m.append((c, None)) + return makestring(m) + else: + if number > 1: + m = self.to_cardinal(number, gender="fem", raw=True) + else: + m = [(OBJ, None)] + if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][0] + else: + c = currency + #print("eeeeeeeee", m) + #m.append((c, None)) + #print("fffffffff", m) + return makestring(m, counted=c) + else: + return self.to_cardinal(number, raw=True) + + + def _cents_verbose(self, number, currency): + if number == 0: + return "" + elif number > 100: + m = self.to_cardinal(number, raw=True) + if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][1] + m.append(("o", "SM")) + m.append((c, None)) + else: + c = currency + m.append((c, None)) + return makestring(m) + else: + if number > 1: + m = self.to_cardinal(number, raw=True) + else: + m = [(OBJ, None)] + if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][1][0] + else: + c = currency + #print("eeeeeeeee", m) + #m.append((c, None)) + #print("fffffffff", m) + return makestring(m, counted=c) + + +def makestring(result, counted=None): + # concatenate numberwords with correct mutation + out = [] + lastmut = None + for w, mut in result: + if w == OBJ: + if not counted: + continue + else: + w = counted + counted = None # only first position + if lastmut: + out.append(mutate(w, lastmut)) + else: + out.append(w) + lastmut = mut + return " ".join(out) + +def mutate(word, mutation): + #print("uuu", number, word) + if mutation == "SM": + return softmutation(word) + elif mutation == "AM": + return aspiratedmutation(word) + return word + +def softmutation(word): + #print("SM<<<<%s>" % word) + if word[0] == "p" and word[1] != "h": + return "b" + word[1:] + elif word[0] == "t" and word[1] != "h": + return "d" + word[1:] + elif word[0] == "c" and word[1] != "h": + return "g" + word[1:] + elif word[0] == "b" or word[0] == "m": + return "f" + word[1:] + elif word[0] == "h": + return word[1:] + elif word[0] == "d" and word[1] != "d": + return "d" + word + elif word.startswith("ll"): + return word[1:] + elif word.startswith("rh"): + return "r" + word[2:] + elif word == "ugain": + return "hugain" + else: + return word + +def aspiratedmutation(word): + if word[0] == "p" and word[1] != "h": + return "ph" + word[1:] + elif word[0] == "t" and word[1] != "h": + return "th" + word[1:] + elif word[0] == "c" and word[1] != "h": + return "ch" + word[1:] + else: + return word + +#def omitt_if_zero(number_to_string): +# return "" if number_to_string == ZERO else number_to_string diff --git a/tests/test_ce.py b/tests/test_ce.py new file mode 100644 index 00000000..f4f71ae1 --- /dev/null +++ b/tests/test_ce.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + +TEST_CASES_CARDINAL = [ + (1, "obl", "б", "цхьана"), + (2, "comp", "в", "шиннал"), + (3, "mat", "д", "кхааннах"), + (4, "mat", "в", "веаннах"), + (5, "abs", "й", "пхиъ"), + (6, "dat", "д", "ялханна"), + (7, "erg", "в", "ворхӀамма"), + (8, "comp", "й", "бархӀаннал"), + (9, "dat", "й", "иссанна"), + (10, "erg", "б", "иттамма"), + (11, "dat", "б", "цхьайттанна"), + (12, "instr", "й", "шийттанца"), + (13, "erg", "б", "кхойттамма"), + (14, "all", "в", "вейттанга"), + (15, "dat", "б", "пхийттанна"), + (16, "dat", "й", "ялхиттанна"), + (17, "dat", "в", "вуьрхӀиттанна"), + (18, "attr", "й", "берхӀитта"), + (19, "all", "й", "ткъайеснанга"), + (20, "attr", "б", "ткъе"), + (21, "all", "в", "ткъе цхаьнга"), + (22, "obl", "в", "ткъе шина"), + (23, "attr", "б", "ткъе кхо"), + (24, "dat", "й", "ткъе йеанна"), + (25, "attr", "й", "ткъе пхи"), + (26, "abs", "б", "ткъе ялх"), + (27, "abs", "в", "ткъе ворхӀ"), + (28, "all", "б", "ткъе бархӀанга"), + (29, "mat", "д", "ткъе иссаннах"), + (30, "gen", "й", "ткъе иттаннан"), + (31, "dat", "в", "ткъе цхьайттанна"), + (32, "comp", "й", "ткъе шийттаннал"), + (33, "instr", "в", "ткъе кхойттанца"), + (34, "instr", "в", "ткъе вейттанца"), + (35, "comp", "в", "ткъе пхийттаннал"), + (36, "dat", "й", "ткъе ялхиттанна"), + (37, "obl", "в", "ткъе вуьрхӀиттан"), + (38, "dat", "й", "ткъе берхӀиттанна"), + (39, "mat", "й", "ткъе ткъайеснаннах"), + (40, "all", "д", "шовзткъанга"), + (41, "obl", "в", "шовзткъе цхьана"), + (42, "dat", "в", "шовзткъе шинна"), + (43, "erg", "й", "шовзткъе кхаамма"), + (44, "erg", "й", "шовзткъе йеамма"), + (45, "comp", "д", "шовзткъе пхеаннал"), + (46, "mat", "б", "шовзткъе ялханнах"), + (47, "erg", "б", "шовзткъе ворхӀамма"), + (48, "erg", "в", "шовзткъе бархӀамма"), + (49, "all", "б", "шовзткъе иссанга"), + (50, "mat", "й", "шовзткъе иттаннах"), + (51, "comp", "в", "шовзткъе цхьайттаннал"), + (52, "erg", "в", "шовзткъе шийттамма"), + (53, "attr", "д", "шовзткъе кхойтта"), + (54, "gen", "б", "шовзткъе бейттаннан"), + (55, "attr", "д", "шовзткъе пхийтта"), + (56, "instr", "й", "шовзткъе ялхиттанца"), + (57, "obl", "б", "шовзткъе вуьрхӀиттан"), + (58, "attr", "б", "шовзткъе берхӀитта"), + (59, "all", "й", "шовзткъе ткъайеснанга"), + (60, "all", "й", "кхузткъанга"), + (61, "gen", "й", "кхузткъе цхьаннан"), + (62, "all", "б", "кхузткъе шинга"), + (63, "instr", "б", "кхузткъе кхаанца"), + (64, "dat", "й", "кхузткъе йеанна"), + (65, "instr", "й", "кхузткъе нхеанца"), + (66, "all", "б", "кхузткъе ялханга"), + (67, "erg", "д", "кхузткъе ворхӀамма"), + (68, "instr", "д", "кхузткъе бархӀанца"), + (69, "mat", "й", "кхузткъе иссаннах"), + (70, "attr", "б", "кхузткъе итт"), + (71, "gen", "б", "кхузткъе цхьайттаннан"), + (72, "abs", "й", "кхузткъе шийтта"), + (73, "mat", "д", "кхузткъе кхойттаннах"), + (74, "instr", "й", "кхузткъе йейттанца"), + (75, "mat", "в", "кхузткъе пхийттаннах"), + (76, "instr", "б", "кхузткъе ялхиттанца"), + (77, "dat", "в", "кхузткъе вуьрхӀиттанна"), + (78, "erg", "д", "кхузткъе берхӀиттамма"), + (79, "gen", "б", "кхузткъе ткъайеснаннан"), + (80, "dat", "б", "дезткъанна"), + (81, "gen", "б", "дезткъе цхьаннан"), + (82, "dat", "б", "дезткъе шинна"), + (83, "obl", "д", "дезткъе кхона"), + (84, "erg", "в", "дезткъе веамма"), + (85, "all", "в", "дезткъе пхеанга"), + (86, "erg", "д", "дезткъе ялхамма"), + (87, "comp", "б", "дезткъе ворхӀаннал"), + (88, "dat", "д", "дезткъе бархӀанна"), + (89, "erg", "б", "дезткъе иссамма"), + (90, "obl", "й", "дезткъе иттан"), + (91, "obl", "б", "дезткъе цхьайттан"), + (92, "abs", "б", "дезткъе шийтта"), + (93, "gen", "в", "дезткъе кхойттаннан"), + (94, "comp", "б", "дезткъе бейттаннал"), + (95, "all", "б", "дезткъе пхийттанга"), + (96, "instr", "д", "дезткъе ялхиттанца"), + (97, "erg", "д", "дезткъе вуьрхӀиттамма"), + (98, "instr", "й", "дезткъе берхӀиттанца"), + (99, "instr", "б", "дезткъе ткъайеснанца"), + + (0, "gen", "б", "нолан"), + (100, "mat", "б", "бӀеннах"), + (200, "attr", "д", "ши бӀе"), + (300, "obl", "в", "кхо бӀен"), + (400, "abs", "в", "ви бӀе"), + (500, "all", "й", "пхи бӀенга"), + (600, "abs", "й", "ялх бӀе"), + (700, "mat", "й", "ворхӀ бӀеннах"), + (800, "gen", "б", "бархӀ бӀеннан"), + (900, "mat", "в", "исс бӀеннах"), + (1000, "gen", "д", "эзарнан"), + (1100, "instr", "д", "эзар бӀенца"), + (1200, "instr", "д", "эзар ши бӀенца"), + (1300, "comp", "б", "эзар кхо бӀеннал"), + (1400, "instr", "д", "эзар ди бӀенца"), + (1500, "comp", "б", "эзар пхи бӀеннал"), + (1600, "erg", "б", "эзар ялх бӀемма"), + (1700, "attr", "д", "эзар ворхӀ бӀе"), + (1800, "obl", "д", "эзар бархӀ бӀен"), + (1900, "gen", "й", "эзар исс бӀеннан"), + (2000, "comp", "д", "ши эзарнал"), + (2022, "comp", "д", "ши эзар ткъе шиннал"), + (2100, "obl", "в", "ши эзар бӀен"), + (423000, "erg", "в", "ви бӀе ткъе кхо эзарно"), + ] + +TEST_CASES_ORDINAL = [ + (1, "all", "б", "цхьалгӀа"), + (2, "dat", "в", "шолгӀа"), + (3, "obl", "й", "кхоалгӀа"), + (4, "dat", "б", "боьалгӀа"), + (5, "dat", "в", "пхоьалгӀа"), + (6, "abs", "в", "йолхалгӀа"), + (7, "abs", "в", "ворхӀалгӀа"), + (8, "abs", "д", "борхӀалӀа"), + (9, "comp", "д", "уьссалгӀа"), + (10, "erg", "д", "уьтталгӀа"), + (11, "all", "б", "цхьайтталгӀа"), + (12, "abs", "й", "шийтталга"), + (13, "gen", "в", "кхойтталгӀа"), + (14, "gen", "в", "вейтталгӀа"), + (15, "mat", "й", "пхийтталгӀа"), + (16, "dat", "й", "ялхитталгӀа"), + (17, "erg", "д", "вуьрхӀитталгӀа"), + (18, "erg", "й", "берхитталӀа"), + (19, "obl", "в", "ткъаесналгӀа"), + (20, "abs", "в", "ткъолгӀа"), + (21, "mat", "б", "ткъе цхьалгӀа"), + (22, "erg", "б", "ткъе шолгӀа"), + (23, "mat", "й", "ткъе кхоалгӀа"), + (24, "obl", "б", "ткъе боьалгӀа"), + (25, "abs", "д", "ткъе пхоьалгӀа"), + (26, "all", "й", "ткъе йолхалгӀа"), + (27, "mat", "в", "ткъе ворхӀалгӀа"), + (28, "instr", "д", "ткъе борхӀалӀа"), + (29, "obl", "б", "ткъе уьссалгӀа"), + (30, "dat", "б", "ткъе уьтталгӀа"), + (31, "obl", "й", "ткъе цхьайтталгӀа"), + (32, "comp", "д", "ткъе шийтталга"), + (33, "attr", "д", "ткъе кхойтталгӀа"), + (34, "gen", "в", "ткъе вейтталгӀа"), + (35, "erg", "д", "ткъе пхийтталгӀа"), + (36, "all", "в", "ткъе ялхитталгӀа"), + (37, "attr", "й", "ткъе вуьрхӀитталгӀа"), + (38, "erg", "б", "ткъе берхитталӀа"), + (39, "gen", "д", "ткъе ткъаесналгӀа"), + (40, "abs", "й", "шовзткъалгІа"), + (41, "erg", "й", "шовзткъе цхьалгӀа"), + (42, "comp", "й", "шовзткъе шолгӀа"), + (43, "obl", "д", "шовзткъе кхоалгӀа"), + (44, "all", "й", "шовзткъе йоьалгӀа"), + (45, "abs", "д", "шовзткъе пхоьалгӀа"), + (46, "comp", "д", "шовзткъе йолхалгӀа"), + (47, "comp", "й", "шовзткъе ворхӀалгӀа"), + (48, "attr", "б", "шовзткъе борхӀалӀа"), + (49, "comp", "й", "шовзткъе уьссалгӀа"), + (50, "abs", "д", "шовзткъе уьтталгӀа"), + (51, "dat", "б", "шовзткъе цхьайтталгӀа"), + (52, "comp", "в", "шовзткъе шийтталга"), + (53, "mat", "б", "шовзткъе кхойтталгӀа"), + (54, "all", "д", "шовзткъе дейтталгӀа"), + (55, "dat", "в", "шовзткъе пхийтталгӀа"), + (56, "erg", "б", "шовзткъе ялхитталгӀа"), + (57, "comp", "й", "шовзткъе вуьрхӀитталгӀа"), + (58, "instr", "в", "шовзткъе берхитталӀа"), + (59, "mat", "б", "шовзткъе ткъаесналгӀа"), + (60, "all", "в", "кхузткъалгІа"), + (61, "obl", "д", "кхузткъе цхьалгӀа"), + (62, "instr", "д", "кхузткъе шолгӀа"), + (63, "erg", "й", "кхузткъе кхоалгӀа"), + (64, "dat", "д", "кхузткъе доьалгӀа"), + (65, "gen", "д", "кхузткъе пхоьалгӀа"), + (66, "mat", "в", "кхузткъе йолхалгӀа"), + (67, "gen", "в", "кхузткъе ворхӀалгӀа"), + (68, "attr", "б", "кхузткъе борхӀалӀа"), + (69, "all", "д", "кхузткъе уьссалгӀа"), + (70, "mat", "в", "кхузткъе уьтталгӀа"), + (71, "gen", "й", "кхузткъе цхьайтталгӀа"), + (72, "obl", "й", "кхузткъе шийтталга"), + (73, "attr", "в", "кхузткъе кхойтталгӀа"), + (74, "dat", "б", "кхузткъе бейтталгӀа"), + (75, "instr", "в", "кхузткъе пхийтталгӀа"), + (76, "gen", "в", "кхузткъе ялхитталгӀа"), + (77, "erg", "д", "кхузткъе вуьрхӀитталгӀа"), + (78, "all", "й", "кхузткъе берхитталӀа"), + (79, "instr", "д", "кхузткъе ткъаесналгӀа"), + (80, "dat", "в", "дезткъалгІа"), + (81, "mat", "в", "дезткъе цхьалгӀа"), + (82, "abs", "д", "дезткъе шолгӀа"), + (83, "abs", "д", "дезткъе кхоалгӀа"), + (84, "erg", "в", "дезткъе воьалгӀа"), + (85, "obl", "й", "дезткъе пхоьалгӀа"), + (86, "instr", "д", "дезткъе йолхалгӀа"), + (87, "all", "в", "дезткъе ворхӀалгӀа"), + (88, "dat", "д", "дезткъе борхӀалӀа"), + (89, "obl", "б", "дезткъе уьссалгӀа"), + (90, "instr", "в", "дезткъе уьтталгӀа"), + (91, "abs", "й", "дезткъе цхьайтталгӀа"), + (92, "comp", "в", "дезткъе шийтталга"), + (93, "erg", "д", "дезткъе кхойтталгӀа"), + (94, "obl", "й", "дезткъе йейтталгӀа"), + (95, "comp", "б", "дезткъе пхийтталгӀа"), + (96, "obl", "б", "дезткъе ялхитталгӀа"), + (97, "gen", "й", "дезткъе вуьрхӀитталгӀа"), + (98, "dat", "б", "дезткъе берхитталӀа"), + (99, "abs", "д", "дезткъе ткъаесналгӀа"), + (100, "abs", "в", "бІолгІа"), + (200, "obl", "й", "ши бІолгІа"), + (300, "mat", "в", "кхо бІолгІа"), + (400, "gen", "б", "би бІолгІа"), + (500, "erg", "й", "пхи бІолгІа"), + (600, "gen", "д", "ялх бІолгІа"), + (700, "instr", "й", "ворхӀ бІолгІа"), + (800, "all", "б", "бархӀ бІолгІа"), + (900, "comp", "б", "исс бІолгІа"), + (1000, "dat", "д", "эзарлагІа"), + (107, "gen", "в", "бӀе ворхӀалгӀа"), + (214, "attr", "д", "ши бӀе дейтталгӀа"), + (321, "comp", "д", "кхо бӀе ткъе цхьалгӀа"), + (428, "dat", "в", "ви бӀе ткъе борхӀалӀа"), + (535, "erg", "й", "пхи бӀе ткъе пхийтталгӀа"), + (642, "all", "й", "ялх бӀе шовзткъе шолгӀа"), + (749, "mat", "в", "ворхӀ бӀе шовзткъе уьссалгӀа"), + (856, "attr", "й", "бархӀ бӀе шовзткъе ялхитталгӀа"), + (963, "mat", "б", "исс бӀе кхузткъе кхоалгӀа"), + (1070, "comp", "в", "эзар кхузткъе уьтталгӀа"), + (1177, "dat", "в", "эзар бӀе кхузткъе вуьрхӀитталгӀа"), + (1284, "abs", "д", "эзар ши бӀе дезткъе доьалгӀа"), + (1391, "dat", "в", "эзар кхо бӀе дезткъе цхьайтталгӀа"), + (1498, "abs", "в", "эзар ви бӀе дезткъе берхитталӀа"), + (1605, "obl", "б", "эзар ялх бӀе пхоьалгӀа"), + (1712, "erg", "й", "эзар ворхӀ бӀе шийтталга"), + (1819, "all", "б", "эзар бархӀ бӀе ткъаесналгӀа"), + (1926, "abs", "б", "эзар исс бӀе ткъе йолхалгӀа"), + (2033, "all", "д", "ши эзар ткъе кхойтталгӀа"), + (2140, "dat", "б", "ши эзар бӀе шовзткъалгІа"), + (423000, "dat", "д", "ди бӀе ткъе кхо эзарлагІа"), + ] + +TEST_CASES_DECIMALS = [ + (123.4567, "бӀе ткъе кхоъ запятая диъ пхиъ ялх ворхӀ") + ] + +TEST_CASES_MILLIONS = [ + (200020, "ши бӀе эзар ткъа"), + (4000400, "ди миллион ди бӀе"), + (60006000, "кхузткъе миллион ялх эзар"), + (800080000, "бархӀ бӀе миллион дезткъе эзар"), + (10001000000, "итт миллиард цхьа миллион"), + (120012000000, "бӀе ткъе миллиард шийтта миллион"), + (1400140000000, "цхьа биллион ди бӀе миллиард бӀе шовзткъе миллион"), + (16001600000000, "ялхитта биллион цхьа миллиард ялх бӀе миллион"), + (180018000000000, "бӀе дезткъе биллион берхӀитта миллиард"), + (2000200000000000, "ши биллиард ши бӀе миллиард"), + (22002200000000000, "ткъе ши биллиард ши биллион ши бӀе миллиард"), + (240024000000000000, "ши бӀе шовзткъе биллиард ткъе ди биллион"), + (2600260000000000000, "ши триллион ялх бӀе биллиард ши бӀе кхузткъе биллион"), + (28002800000000000000, "ткъе бархӀ триллион ши биллиард бархӀ бӀе биллион"), + (300030000000000000000, "кхо бӀе триллион ткъе итт биллиард"), + (3200320000000000000000, "кхо триллиард ши бӀе триллион кхо бӀе ткъе биллиард"), + (34003400000000000000000, "ткъе дейтта триллиард кхо триллион ди бӀе биллиард"), + (360036000000000000000000, "кхо бӀе кхузткъе триллиард ткъе ялхитта триллион"), + (3800380000000000000000000, "кхо квадриллион бархӀ бӀе триллиард кхо бӀе дезткъе триллион"), + (40004000000000000000000000, "шовзткъе квадриллион ди триллиард"), + (420042000000000000000000000, "ди бӀе ткъе квадриллион шовзткъе ши триллиард"), + (4400440000000000000000000000, "ди квадриллиард ди бӀе квадриллион ди бӀе шовзткъе триллиард"), + (46004600000000000000000000000, "шовзткъе ялх квадриллиард ди квадриллион ялх бӀе триллиард"), + (480048000000000000000000000000, "ди бӀе дезткъе квадриллиард шовзткъе бархӀ квадриллион"), + (5000500000000000000000000000000, "пхи квинтиллион пхи бӀе квадриллион"), + (52005200000000000000000000000000, "шовзткъе шийтта квинтиллион пхи квадриллиард ши бӀе квадриллион"), + (540054000000000000000000000000000, "пхи бӀе шовзткъе квинтиллион шовзткъе дейтта квадриллиард"), + (5600560000000000000000000000000000, "пхи квинтиллиард ялх бӀе квинтиллион пхи бӀе кхузткъе квадриллиард"), + ] + +TEST_CURRENCY = [ + (143.55, "abs", "бӀе шовзткъе кхо Сом, шовзткъе пхийтта Кепек"), + ] + +class Num2WordsCETest(TestCase): + + def test_number(self): + for test in TEST_CASES_CARDINAL: + self.assertEqual(num2words(test[0], lang='ce', case=test[1], clazz=test[2]), test[3]) + + def test_millions(self): + for test in TEST_CASES_MILLIONS: + self.assertEqual(num2words(test[0], lang='ce'), test[1]) + + def test_ordinal_number(self): + for test in TEST_CASES_ORDINAL: + self.assertEqual(num2words(test[0], lang='ce', to="ordinal", clazz=test[2]), test[3]) + + def test_currency(self): + for test in TEST_CURRENCY: + self.assertEqual(num2words(test[0], lang='ce', to="currency", currency="RUB", case=test[1]), test[2]) + + def test_decimals(self): + for test in TEST_CASES_DECIMALS: + self.assertEqual(num2words(test[0], lang='ce'), test[1]) + + diff --git a/tests/test_cy.py b/tests/test_cy.py new file mode 100644 index 00000000..7242df52 --- /dev/null +++ b/tests/test_cy.py @@ -0,0 +1,430 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2023, Johannes Heinecke. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + +TEST_CASES_CARDINAL = ( + (0, "dim"), + (1, "un"), + (1, "un"), + (2, "dau"), + (3, "tri"), + (4, "pedwar"), + (5, "pump"), + (6, "chwech"), + (7, "saith"), + (8, "wyth"), + (9, "naw"), + (10, "deg"), + (11, "un ar ddeg"), + (12, "deuddeg"), + (13, "tri ar ddeg"), + (14, "pedwar ar ddeg"), + (15, "pymtheg"), + (16, "un ar bymtheg"), + (17, "dau ar bymtheg"), + (18, "deunaw"), + (19, "pedwar ar bymtheg"), + (20, "ugain"), + (21, "un ar hugain"), + (22, "dau ar hugain"), + (23, "tri ar hugain"), + (24, "pedwar ar hugain"), + (25, "pump ar hugain"), + (26, "chwech ar hugain"), + (27, "saith ar hugain"), + (28, "wyth ar hugain"), + (29, "naw ar hugain"), + (30, "deg ar hugain"), + (31, "un ar ddeg ar hugain"), + (32, "deuddeg ar hugain"), + (33, "tri ar ddeg ar hugain"), + (34, "pedwar ar ddeg ar hugain"), + (35, "pymtheg ar hugain"), + (36, "un ar bymtheg ar hugain"), + (37, "dau ar bymtheg ar hugain"), + (38, "deunaw ar hugain"), + (39, "pedwar ar bymtheg ar hugain"), + (40, "deugain"), + (41, "un a deugain"), + (42, "dau a deugain"), + (43, "tri a deugain"), + (44, "pedwar a deugain"), + (45, "pump a deugain"), + (46, "chwech a deugain"), + (47, "saith a deugain"), + (48, "wyth a deugain"), + (49, "naw a deugain"), + (50, "hanner cant"), + (51, "hanner cant ac un"), + (52, "hanner cant a dau"), + (53, "hanner cant a thri"), + (54, "hanner cant a phedwar"), + (55, "hanner cant a phump"), + (56, "hanner cant a chwech"), + (57, "hanner cant a saith"), + (58, "hanner cant a wyth"), + (59, "hanner cant a naw"), + (60, "trigain"), + (61, "un a thrigain"), + (62, "dau a thrigain"), + (63, "tri a thrigain"), + (64, "pedwar a thrigain"), + (65, "pump a thrigain"), + (66, "chwech a thrigain"), + (67, "saith a thrigain"), + (68, "wyth a thrigain"), + (69, "naw a thrigain"), + (70, "deg a thrigain"), + (71, "un ar ddeg a thrigain"), + (72, "deuddeg a thrigain"), + (73, "tri ar ddeg a thrigain"), + (74, "pedwar ar ddeg a thrigain"), + (75, "pymtheg a thrigain"), + (76, "un ar bymtheg a thrigain"), + (77, "dau ar bymtheg a thrigain"), + (78, "deunaw a thrigain"), + (79, "pedwar ar bymtheg a thrigain"), + (80, "pedwar ugain"), + (81, "un a phedwar ugain"), + (82, "dau a phedwar ugain"), + (83, "tri a phedwar ugain"), + (84, "pedwar a phedwar ugain"), + (85, "pump a phedwar ugain"), + (86, "chwech a phedwar ugain"), + (87, "saith a phedwar ugain"), + (88, "wyth a phedwar ugain"), + (89, "naw a phedwar ugain"), + (90, "deg a phedwar ugain"), + (91, "un ar ddeg a phedwar ugain"), + (92, "deuddeg a phedwar ugain"), + (93, "tri ar ddeg a phedwar ugain"), + (94, "pedwar ar ddeg a phedwar ugain"), + (95, "pymtheg a phedwar ugain"), + (96, "un ar bymtheg a phedwar ugain"), + (97, "dau ar bymtheg a phedwar ugain"), + (98, "deunaw a phedwar ugain"), + (99, "pedwar ar bymtheg a phedwar ugain"), + (100, "cant"), + (101, "cant ac un"), + (102, "cant a dau"), + (103, "cant a thri"), + (104, "cant a phedwar"), + (105, "cant a phump"), + (106, "cant a chwech"), + (107, "cant a saith"), + (108, "cant ac wyth"), + (109, "cant a naw"), + (110, "cant a deg"), + (111, "cant ac un ar ddeg"), + (112, "cant a deuddeg"), + (113, "cant a thri ar ddeg"), + (114, "cant a phedwar ar ddeg"), + (115, "cant a phymtheg"), + (116, "cant ac un ar bymtheg"), + (117, "cant a dau ar bymtheg"), + (118, "cant a deunaw"), + (119, "cant a phedwar ar bymtheg"), + (120, "cant ac ugain"), + (121, "cant ac un ar hugain"), + (122, "cant a dau ar hugain"), + + (100, "cant"), + (217, "dau gant a dau ar bymtheg"), + (334, "tri chant a phedwar ar ddeg ar hugain"), + (451, "pedwar cant a hanner ac un"), + (568, "pump cant ac wyth a thrigain"), + (685, "chwech chant a phump a phedwar ugain"), + (802, "wyth cant a dau"), + (919, "naw cant a phedwar ar bymtheg"), + + (100, "cant"), + (200, "dau gant"), + (300, "tri chant"), + (400, "pedwar cant"), + (500, "pump cant"), + (600, "chwech chant"), + (700, "saith cant"), + (800, "wyth cant"), + (900, "naw cant"), + (1000, "mil"), + + (1000, "mil"), + (12111, "deuddeg mil cant ac un ar ddeg"), + (23222, "tair ar hugain mil dau gant a dau ar hugain"), + (34333, "pedair ar ddeg ar hugain mil tri chant a thri ar ddeg ar hugain"), + (45444, "pump a deugain mil pedwar cant a phedwar a deugain"), + (56555, "hanner cant a chwech mil pump cant a hanner a phump"), + (67666, "saith a thrigain mil chwech chant a chwech a thrigain"), + (78777, "deunaw a thrigain mil saith cant a dau ar bymtheg a thrigain"), + (89888, "naw a phedwar ugain mil wyth cant ac wyth a phedwar ugain"), + (100999, "cant mil naw cant a phedwar ar bymtheg a phedwar ugain"), + (112110, "cant a deuddeg mil cant a deg"), + (123221, "cant a thair ar hugain mil dau gant ac un ar hugain"), + (134332, "cant a phedair ar ddeg ar hugain mil tri chant a deuddeg ar hugain"), + (145443, "cant a phump a deugain mil pedwar cant a thri a deugain"), + (156554, "cant a hanner a chwech mil pump cant a hanner a phedwar"), + + + (123, "cant a thri ar hugain"), + (2345, "dwy fil tri chant a phump a deugain"), + (34567, "pedair ar ddeg ar hugain mil pump cant a saith a thrigain"), + (654321, "chwech chant a hanner a phedair mil tri chant ac un ar hugain"), + (7654321, "saith miliwn chwech chant a hanner a phedair mil tri chant ac un ar hugain"), + (987654321, "naw cant a saith a phedwar ugain miliwn chwech chant a hanner a phedair mil tri chant ac un ar hugain"), + (123456789012, "cant a thri ar hugain biliwn pedwar cant a hanner a chwech miliwn saith cant a naw a phedwar ugain mil deuddeg"), + (2023, "dwy fil tri ar hugain"), + (-40123, "meinws deugain mil cant a thri ar hugain"), + (12340000000000000, "deuddeg cwadriliwn tri chant a deugain triliwn"), + (3000000000000000, "tri chwadriliwn"), + (2500000000000000000000000000000000, "dau ddengiliwn pump cant noniliwn"), + + + ) + + +TEST_CASES_CARDINAL_FEM = ( + (2, "dwy"), + (3, "tair"), + (4, "pedair"), + (5, "pump"), + (6, "chwech"), + (7, "saith"), + (8, "wyth"), + (9, "naw"), + (10, "deg"), + (11, "un ar ddeg"), + (12, "deuddeg"), + (13, "tair ar ddeg"), + (14, "pedair ar ddeg"), + (15, "pymtheg"), + (16, "un ar bymtheg"), + (17, "dwy ar bymtheg"), + (18, "deunaw"), + (19, "pedair ar bymtheg"), + (20, "ugain"), + (21, "un ar hugain"), + (22, "dwy ar hugain"), + (23, "tair ar hugain"), + (24, "pedair ar hugain"), + (25, "pump ar hugain"), + (26, "chwech ar hugain"), + (27, "saith ar hugain"), + (28, "wyth ar hugain"), + (29, "naw ar hugain"), + (30, "deg ar hugain"), + (31, "un ar ddeg ar hugain"), + (32, "deuddeg ar hugain"), + (33, "tair ar ddeg ar hugain"), + (34, "pedair ar ddeg ar hugain"), + (35, "pymtheg ar hugain"), + (36, "un ar bymtheg ar hugain"), + (37, "dwy ar bymtheg ar hugain"), + (38, "deunaw ar hugain"), + (39, "pedair ar bymtheg ar hugain"), + (40, "deugain"), + (41, "un a deugain"), + (42, "dwy a deugain"), + (43, "tair a deugain"), + (44, "pedair a deugain"), + (45, "pump a deugain"), + (46, "chwech a deugain"), + (47, "saith a deugain"), + (48, "wyth a deugain"), + (49, "naw a deugain"), + (50, "hanner cant"), + (51, "hanner cant ac un"), + (52, "hanner cant a dwy"), + (53, "hanner cant a thair"), + (54, "hanner cant a phedair"), + (55, "hanner cant a phump"), + (56, "hanner cant a chwech"), + (57, "hanner cant a saith"), + (58, "hanner cant a wyth"), + (59, "hanner cant a naw"), + (60, "trigain"), + (61, "un a thrigain"), + (62, "dwy a thrigain"), +) + +TEST_CASES_ORDINAL = ( + (0, "dimfed"), + (1, "cyntaf"), + (2, "ail"), + (3, "trydydd"), + (4, "pedwerydd"), + (5, "pumed"), + (6, "chweched"), + (7, "saithfed"), + (8, "wythfed"), + (9, "nawfed"), + (10, "degfed"), + (11, "unfed ar ddeg"), + (12, "deuddegfed"), + (13, "trydydd ar ddeg"), + (14, "pedwerydd ar ddeg"), + (15, "pymthegfed"), + (16, "unfed ar bymtheg"), + (17, "ail ar bymtheg"), + (18, "deunawfed"), + (19, "pedwerydd ar bymtheg"), + (20, "ugainfed"), + (21, "cyntaf ar hugain"), + (22, "ail ar hugain"), + (23, "trydydd ar hugain"), + (24, "pedwerydd ar hugain"), + (25, "pumed ar hugain"), + (26, "chweched ar hugain"), + (27, "saithfed ar hugain"), + (28, "wythfed ar hugain"), + (29, "nawfed ar hugain"), + (30, "degfed ar hugain"), + (31, "unfed ar ddeg ar hugain"), + (32, "deuddegfed ar hugain"), + (33, "trydydd ar ddeg ar hugain"), + (34, "pedwerydd ar ddeg ar hugain"), + (35, "pymthegfed ar hugain"), + (36, "unfed ar bymtheg ar hugain"), + (37, "ail ar bymtheg ar hugain"), + (38, "deunawfed ar hugain"), + (39, "pedwerydd ar bymtheg ar hugain"), + (40, "deugainfed"), + (41, "cyntaf a deugain"), + (42, "ail a deugain"), + (43, "trydydd a deugain"), + (44, "pedwerydd a deugain"), + (45, "pumed a deugain"), + (46, "chweched a deugain"), + (47, "saithfed a deugain"), + (48, "wythfed a deugain"), + (49, "nawfed a deugain"), + (50, "degfed a deugain"), + (51, "unfed ar ddeg a deugain"), + (52, "deuddegfed a deugain"), + (53, "trydydd ar ddeg a deugain"), + (54, "pedwerydd ar ddeg a deugain"), + (55, "pymthegfed a deugain"), + (56, "unfed ar bymtheg a deugain"), + (57, "ail ar bymtheg a deugain"), + (58, "deunawfed a deugain"), + (59, "pedwerydd ar bymtheg a deugain"), + (60, "trigainfed"), + (61, "cyntaf a thrigain"), + (62, "ail a thrigain"), + (63, "trydydd a thrigain"), + (64, "pedwerydd a thrigain"), + (65, "pumed a thrigain"), + (66, "chweched a thrigain"), + (67, "saithfed a thrigain"), + (68, "wythfed a thrigain"), + (69, "nawfed a thrigain"), + (70, "degfed a thrigain"), + (71, "unfed ar ddeg a thrigain"), + (72, "deuddegfed a thrigain"), + (73, "trydydd ar ddeg a thrigain"), + (74, "pedwerydd ar ddeg a thrigain"), + (75, "pymthegfed a thrigain"), + (76, "unfed ar bymtheg a thrigain"), + (77, "ail ar bymtheg a thrigain"), + (78, "deunawfed a thrigain"), + (79, "pedwerydd ar bymtheg a thrigain"), + (80, "pedwar ugainfed"), + (81, "cyntaf a phedwar ugain"), + (82, "ail a phedwar ugain"), + (83, "trydydd a phedwar ugain"), + (84, "pedwerydd a phedwar ugain"), + (85, "pumed a phedwar ugain"), + (86, "chweched a phedwar ugain"), + (87, "saithfed a phedwar ugain"), + (88, "wythfed a phedwar ugain"), + (89, "nawfed a phedwar ugain"), + (90, "degfed a phedwar ugain"), + (91, "unfed ar ddeg a phedwar ugain"), + (92, "deuddegfed a phedwar ugain"), + (93, "trydydd ar ddeg a phedwar ugain"), + (94, "pedwerydd ar ddeg a phedwar ugain"), + (95, "pymthegfed a phedwar ugain"), + (96, "unfed ar bymtheg a phedwar ugain"), + (97, "ail ar bymtheg a phedwar ugain"), + (98, "deunawfed a phedwar ugain"), + (99, "pedwerydd ar bymtheg a phedwar ugain"), + (100, "canfed"), + ) + +TEST_CASES_DECIMALS = [ + (123.4567, "cant a thri ar hugain pwynt pedwar pump chwech saith") + ] + +TEST_CASES_TO_CURRENCY_GBP = ( + (2.04, 'dwy bunt, pedwar ceiniog'), + (3.50, 'tair punt, hanner cant ceiniog'), + (2002.15, 'dwy fil dwy o bunnoedd, pymtheg ceiniog'), + (100.01, "cant punt, ceiniog"), + (50.00, "hanner cant punt"), + (51.00, "hanner cant ac un punt"), + (152.50, "cant a hanner a dwy o bunnoedd, hanner cant ceiniog") + ) + +TEST_CASES_COUNTED = [ + (2, "ci", "masc", "dau gi"), + (11, "ci", "masc", "un ci ar ddeg"), + (13, "ci", "masc", "tri chi ar ddeg"), + (26, "ci", "masc", "chwech chi ar hugain"), + (56, "ci", "masc", "hanner cant a chwech chi"), + (100, "cwn", "masc", "cant o gwn"), + (2000, "cathod", "fem", "dwy fil o gathod"), + (11, "cath", "fem", "un cath ar ddeg"), + (13, "cath", "fem", "tair cath ar ddeg"), + (26, "cath", "fem", "chwech chath ar hugain"), + (42, "cath", "fem", "dwy gath a deugain"), + (56, "cath", "fem", "hanner cant a chwech chath"), + + ] + +class Num2WordsCYTest(TestCase): + + def test_number(self): + for test in TEST_CASES_CARDINAL: + self.assertEqual(num2words(test[0], lang='cy'), test[1]) + + def test_number_fem(self): + for test in TEST_CASES_CARDINAL_FEM: + self.assertEqual(num2words(test[0], lang='cy', gender="fem"), test[1]) + + def test_decimals(self): + for test in TEST_CASES_DECIMALS: + self.assertEqual(num2words(test[0], lang='cy'), test[1]) + + + def test_ordinals(self): + for test in TEST_CASES_ORDINAL: + self.assertEqual(num2words(test[0], lang='cy', to="ordinal"), test[1]) + + def test_pounds(self): + for test in TEST_CASES_TO_CURRENCY_GBP: + self.assertEqual(num2words(test[0], lang='cy', to="currency", currency="GBP"), test[1]) + + + def test_counted(self): + for test in TEST_CASES_COUNTED: + self.assertEqual(num2words(test[0], lang='cy', counted=test[1], gender=test[2]), test[3]) + + + From 087e402f557f8c4f3b952de9b084668fc548b3e9 Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Sun, 5 Nov 2023 13:52:02 +0100 Subject: [PATCH 2/9] mini doc for new languages --- num2words/README.md | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 num2words/README.md diff --git a/num2words/README.md b/num2words/README.md new file mode 100644 index 00000000..18558096 --- /dev/null +++ b/num2words/README.md @@ -0,0 +1,54 @@ +# Add new language + +for each new language you must create a file `lang_NN.py` where `NN` is the +ISO 639-1 or ISO 639-3 [language code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). + +This class must be a subclass of `Num2Word_EU` and implement at least the following methods + +``` +to_cardinal(self, number) +to_ordinal(self, number) +`` + +To integrate your language into the `num2words` module, add the name of your file +to the import list in [num2words/__init__.py](num2words/__init__.py) (top of the file), +and `'nn': lang_NN.Num2Word_NN()` to the `CONVERTER_CLASSES` list in the same file. +Do not forget to remplace `NN` by the appropriate ISO 639 language code. + +The following is a template for a new language class + +``` +from .lang_EU import Num2Word_EU + +class Num2Word_CY(Num2Word_EU): + def setup(self): + Num2Word_EU.setup(self) + + def __init__(self): + pass + + def to_ordinal(self, number): + # implement here your code. number is the integer to be transformed into an ordinal + # as a word (str) + # which is returned + return "NOT IMPLEMENTED" + + def to_cardinal(self, number): + # implement here your code. number is the integer to be transformed into an cardinal + # as a word (str) + # which is returned + return "NOT IMPLEMENTED" +``` + +You can use as manu auxiliary methods as you need to make your code efficient and readable. +If you need further options like Gender, Formal/Informal, add those parameters to the methods, +e.g. + +``` + def to_ordinal(self, number, gender="fem", informal=True) + # your code + pass +``` + +More inspiration can be found in existing `num2words/lang_NN.py` files + From 7d9de8a34cb352bb8c88fcc606f99d9ee6885b04 Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Sun, 5 Nov 2023 13:53:34 +0100 Subject: [PATCH 3/9] added Welsh (cy) and Chechen (ce) --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index c5404bb1..46dcf17b 100644 --- a/README.rst +++ b/README.rst @@ -80,6 +80,8 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a * ``ar`` (Arabic) * ``az`` (Azerbaijani) * ``by`` (Belarusian) +* ``ce`` (Chechen) +* ``cy`` (Welsh) * ``cz`` (Czech) * ``de`` (German) * ``dk`` (Danish) From 7275bcd725fc8dcacce6b33e62905263ab77b00a Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Thu, 9 Nov 2023 13:39:07 +0100 Subject: [PATCH 4/9] pass tox tests (flake8, isort) --- num2words/README.md | 15 ++ num2words/__init__.py | 18 +- num2words/lang_CE.py | 478 +++++++++++++++++++++--------------------- num2words/lang_CY.py | 257 ++++++++++++++--------- tests/test_ce.py | 109 +++++++--- tests/test_cy.py | 83 +++++--- 6 files changed, 544 insertions(+), 416 deletions(-) diff --git a/num2words/README.md b/num2words/README.md index 18558096..9f873881 100644 --- a/num2words/README.md +++ b/num2words/README.md @@ -52,3 +52,18 @@ e.g. More inspiration can be found in existing `num2words/lang_NN.py` files +## Code validation + +In order to get your contribution merged into the main project, your code must test the validation tests. +For this install the packages needed to test + +``` +pip install -r requirements-test.txt +``` + +and run `tox` + +``` +tox +``` + diff --git a/num2words/__init__.py b/num2words/__init__.py index 5641598e..a0dff43b 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -17,15 +17,15 @@ from __future__ import unicode_literals -from . import (lang_AM, lang_AR, lang_AZ, lang_BY, - lang_CE, lang_CY, lang_CZ, lang_DE, lang_DK, - lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO, - lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, lang_FI, lang_FR, - lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HU, lang_ID, - lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT, - lang_LV, lang_NL, lang_NO, lang_PL, lang_PT, lang_PT_BR, - lang_RO, lang_RU, lang_SK, lang_SL, lang_SR, lang_SV, lang_TE, - lang_TG, lang_TH, lang_TR, lang_UK, lang_VI) +from . import (lang_AM, lang_AR, lang_AZ, lang_BY, lang_CE, lang_CY, lang_CZ, + lang_DE, lang_DK, lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, + lang_ES, lang_ES_CO, lang_ES_GT, lang_ES_NI, lang_ES_VE, + lang_FA, lang_FI, lang_FR, lang_FR_BE, lang_FR_CH, lang_FR_DZ, + lang_HE, lang_HU, lang_ID, lang_IS, lang_IT, lang_JA, lang_KN, + lang_KO, lang_KZ, lang_LT, lang_LV, lang_NL, lang_NO, lang_PL, + lang_PT, lang_PT_BR, lang_RO, lang_RU, lang_SK, lang_SL, + lang_SR, lang_SV, lang_TE, lang_TG, lang_TH, lang_TR, lang_UK, + lang_VI) CONVERTER_CLASSES = { 'am': lang_AM.Num2Word_AM(), diff --git a/num2words/lang_CE.py b/num2words/lang_CE.py index 0d6c01ef..1e6ed99f 100644 --- a/num2words/lang_CE.py +++ b/num2words/lang_CE.py @@ -16,266 +16,265 @@ from __future__ import unicode_literals +from .currency import parse_currency_parts from .lang_EU import Num2Word_EU -from .currency import parse_currency_parts, prefix_currency # Chechen numbers inflect in case if without noun or # use a special oblique ending when followed by a counted noun # 4, 14, 40 and composites thereof agree in class (gender) with the # noun. Chechen has 6 classes which are indicated by the initial -# letter of 4, 14 and 40. By default it is "д" but it can also be "б", "й" or "в". +# letter of 4, 14 and 40. By default it is "д" but +# it can also be "б", "й" or "в". # Indicate the needed class prefix as follows # num2words(4, lang='ce', case="abs", clazz="б") CARDINALS = { "casenames": { - "abs": "Им.", - "gen": "Род.", - "dat": "Дат.", - "erg": "Эрг;", + "abs": "Им.", + "gen": "Род.", + "dat": "Дат.", + "erg": "Эрг;", "instr": "Твор.", - "mat": "Вещ.", - "comp": "Сравнит.", - "all": "Местн.", - }, - "casesuffix_cons": { # to be added to numerals with final consonant - "gen": "аннан", - "dat": "анна", - "erg": "амма", + "mat": "Вещ.", + "comp": "Сравнит.", + "all": "Местн.", + }, + "casesuffix_cons": { # to be added to numerals with final consonant + "gen": "аннан", + "dat": "анна", + "erg": "амма", "instr": "анца", - "mat": "аннах", - "comp": "аннал", - "all": "анга", - "obl": "ан", - "ORD": "алгӀа", - }, - "casesuffix_voc": {# to be added to numerals with final vowel - "gen": "ннан", - "dat": "нна", - "erg": "мма", + "mat": "аннах", + "comp": "аннал", + "all": "анга", + "obl": "ан", + "ORD": "алгӀа", + }, + "casesuffix_voc": { # to be added to numerals with final vowel + "gen": "ннан", + "dat": "нна", + "erg": "мма", "instr": "нца", - "mat": "ннах", - "comp": "ннал", - "all": "нга", - "obl": "н", - "ORD": "лгӀа", - }, - 0: { "attr": "ноль", - "abs": "ноль", - "gen": "нолан", - "dat": "нолана", - "erg": "ноло", + "mat": "ннах", + "comp": "ннал", + "all": "нга", + "obl": "н", + "ORD": "лгӀа", + }, + 0: { + "attr": "ноль", + "abs": "ноль", + "gen": "нолан", + "dat": "нолана", + "erg": "ноло", "instr": "ноланца", - "mat": "ноланах", - "comp": "ноланал", - "all": "ноланга", + "mat": "ноланах", + "comp": "ноланал", + "all": "ноланга", }, 1: { - "attr": "цхьа", # in front of nouns in ABS - "obl": "цхьана", # with nouns in other cases than ABS - "abs": "цхьаъ", - "gen": "цхьаннан", - "dat": "цхьанна", - "erg": "цхьамма", + "attr": "цхьа", # in front of nouns in ABS + "obl": "цхьана", # with nouns in other cases than ABS + "abs": "цхьаъ", + "gen": "цхьаннан", + "dat": "цхьанна", + "erg": "цхьамма", "instr": "цхьаьнца", - "mat": "цхьаннах", - "comp": "цхьаннал", - "all": "цхаьнга", - "ORD": "цхьалгӀа", - }, + "mat": "цхьаннах", + "comp": "цхьаннал", + "all": "цхаьнга", + "ORD": "цхьалгӀа", + }, 2: { - "attr": "ши", #in front of 100, 1000 - "obl": "шина", - "abs": "шиъ", - "gen": "шиннан", - "dat": "шинна", - "erg": "шимма", + "attr": "ши", # in front of 100, 1000 + "obl": "шина", + "abs": "шиъ", + "gen": "шиннан", + "dat": "шинна", + "erg": "шимма", "instr": "шинца", - "mat": "шиннах", - "comp": "шиннал", - "all": "шинга", - "ORD": "шолгӀа", - }, + "mat": "шиннах", + "comp": "шиннал", + "all": "шинга", + "ORD": "шолгӀа", + }, 3: { - "attr": "кхо", - "obl": "кхона", - "abs": "кхоъ", - "gen": "кхааннан", - "dat": "кхаанна", - "erg": "кхаамма", + "attr": "кхо", + "obl": "кхона", + "abs": "кхоъ", + "gen": "кхааннан", + "dat": "кхаанна", + "erg": "кхаамма", "instr": "кхаанца", - "mat": "кхааннах", - "comp": "кхааннал", - "all": "кхаанга", - "ORD": "кхоалгӀа", - - }, + "mat": "кхааннах", + "comp": "кхааннал", + "all": "кхаанга", + "ORD": "кхоалгӀа", + }, 4: { - "attr": "д*и", - "obl": "д*еа", - "abs": "д*иъ", - "gen": "д*еаннан", - "dat": "д*еанна", - "erg": "д*еамма", + "attr": "д*и", + "obl": "д*еа", + "abs": "д*иъ", + "gen": "д*еаннан", + "dat": "д*еанна", + "erg": "д*еамма", "instr": "д*еанца", - "mat": "д*еаннах", - "comp": "д*еаннал", - "all": "д*еанга", - "ORD": "д*оьалгӀа", - }, - + "mat": "д*еаннах", + "comp": "д*еаннал", + "all": "д*еанга", + "ORD": "д*оьалгӀа", + }, 5: { - "attr": "пхи", - "obl": "пхеа", - "abs": "пхиъ", - "gen": "пхеаннан", - "dat": "пхеанна", - "erg": "пхеамма", + "attr": "пхи", + "obl": "пхеа", + "abs": "пхиъ", + "gen": "пхеаннан", + "dat": "пхеанна", + "erg": "пхеамма", "instr": "нхеанца", - "mat": "пхеаннах", - "comp": "пхеаннал", - "all": "пхеанга", - "ORD": "пхоьалгӀа", - }, - + "mat": "пхеаннах", + "comp": "пхеаннал", + "all": "пхеанга", + "ORD": "пхоьалгӀа", + }, 6: { - "abs": "ялх", + "abs": "ялх", "attr": "ялх", - "ORD" : "йолхалгӀа", + "ORD": "йолхалгӀа", }, 7: { - "abs": "ворхӀ", + "abs": "ворхӀ", "attr": "ворхӀ", - "ORD": "ворхӀалгӀа", + "ORD": "ворхӀалгӀа", }, 8: { - "abs": "бархӀ", + "abs": "бархӀ", "attr": "бархӀ", - "ORD": "борхӀалӀа", + "ORD": "борхӀалӀа", }, 9: { - "abs": "исс", + "abs": "исс", "attr": "исс", - "ORD": "уьссалгӀа", + "ORD": "уьссалгӀа", }, 10: { - "attr": "итт", - "abs": "итт", - "gen": "иттаннан", - "dat": "иттанна", - "erg": "иттамма", + "attr": "итт", + "abs": "итт", + "gen": "иттаннан", + "dat": "иттанна", + "erg": "иттамма", "instr": "иттанца", - "mat": "иттаннах", - "comp": "иттаннал", - "all": "иттанга", - "ORD": "уьтталгӀа", - }, + "mat": "иттаннах", + "comp": "иттаннал", + "all": "иттанга", + "ORD": "уьтталгӀа", + }, 11: { - "abs": "цхьайтта", + "abs": "цхьайтта", "attr": "цхьайтта", - "ORD": "цхьайтталгӀа", - }, + "ORD": "цхьайтталгӀа", + }, 12: { - "abs": "шийтта", + "abs": "шийтта", "attr": "шийтта", - "ORD": "шийтталга", - }, + "ORD": "шийтталга", + }, 13: { - "abs": "кхойтта", + "abs": "кхойтта", "attr": "кхойтта", - "ORD": "кхойтталгӀа", - }, + "ORD": "кхойтталгӀа", + }, 14: { - "abs": "д*ейтта", + "abs": "д*ейтта", "attr": "д*ейтта", - "ORD": "д*ейтталгӀа", - }, + "ORD": "д*ейтталгӀа", + }, 15: { - "abs": "пхийтта", + "abs": "пхийтта", "attr": "пхийтта", - "ORD": "пхийтталгӀа", - }, + "ORD": "пхийтталгӀа", + }, 16: { - "abs": "ялхитта", + "abs": "ялхитта", "attr": "ялхитта", - "ORD": "ялхитталгӀа", - }, + "ORD": "ялхитталгӀа", + }, 17: { - "abs": "вуьрхӀитта", + "abs": "вуьрхӀитта", "attr": "вуьрхӀитта", - "ORD": "вуьрхӀитталгӀа", - }, + "ORD": "вуьрхӀитталгӀа", + }, 18: { - "abs": "берхӀитта", + "abs": "берхӀитта", "attr": "берхӀитта", - "ORD": "берхитталӀа", - }, + "ORD": "берхитталӀа", + }, 19: { - "abs": "ткъайесна", + "abs": "ткъайесна", "attr": "ткъайесна", - "ORD": "ткъаесналгӀа", - }, + "ORD": "ткъаесналгӀа", + }, 20: { - "abs": "ткъа", - "gen": "ткъаннан", - "dat": "ткъанна", - "erg": "ткъамма", + "abs": "ткъа", + "gen": "ткъаннан", + "dat": "ткъанна", + "erg": "ткъамма", "instr": "ткъанца", - "mat": "ткъаннах", - "comp": "ткъаннал", - "all": "ткъанга", - "attr": "ткъе", - "ORD": "ткъолгӀа", - }, + "mat": "ткъаннах", + "comp": "ткъаннал", + "all": "ткъанга", + "attr": "ткъе", + "ORD": "ткъолгӀа", + }, 40: { - "abs": "шовзткъа", + "abs": "шовзткъа", "attr": "шовзткъе", - "ORD": "шовзткъалгІа", - }, + "ORD": "шовзткъалгІа", + }, 60: { - "abs": "кхузткъа", + "abs": "кхузткъа", "attr": "кхузткъе", - "ORD": "кхузткъалгІа", - }, + "ORD": "кхузткъалгІа", + }, 80: { - "abs": "дезткъа", + "abs": "дезткъа", "attr": "дезткъе", - "ORD": "дезткъалгІа", - }, + "ORD": "дезткъалгІа", + }, 100: { - "attr": "бӀе", - "abs": "бӀе", - "obl": "бӀен", - "gen": "бӀеннан", - "dat": "бӀенна", - "erg": "бӀемма", + "attr": "бӀе", + "abs": "бӀе", + "obl": "бӀен", + "gen": "бӀеннан", + "dat": "бӀенна", + "erg": "бӀемма", "instr": "бӀенца", - "mat": "бӀеннах", - "comp": "бӀеннал", - "all": "бӀенга", - "ORD": "бІолгІа", - }, + "mat": "бӀеннах", + "comp": "бӀеннал", + "all": "бӀенга", + "ORD": "бІолгІа", + }, 1000: { - "attr": "эзар", - "abs": "эзар", - "obl": "эзаран", - "gen": "эзарнан", - "dat": "эзарна", - "erg": "эзарно", + "attr": "эзар", + "abs": "эзар", + "obl": "эзаран", + "gen": "эзарнан", + "dat": "эзарна", + "erg": "эзарно", "instr": "эзарнаца", - "mat": "эзарнах", - "comp": "эзарнал", - "all": "эзаранга", - "ORD": "эзарлагІа", - }, + "mat": "эзарнах", + "comp": "эзарнал", + "all": "эзаранга", + "ORD": "эзарлагІа", + }, 1000000: { "attr": "миллион", "abs": "миллион", "ORD": "миллионалгІа", - } - } - + }, +} + ILLIONS = { 6: { "attr": "миллион", @@ -331,17 +330,17 @@ MINUS = "минус" -DECIMALPOINT = "запятая" # check ! +DECIMALPOINT = "запятая" # check ! + class Num2Word_CE(Num2Word_EU): CURRENCY_FORMS = { # currency code: (sg, pl), (sg, pl) - - 'EUR': (('Евро', 'Евро'), ("Сент", "Сенташ")), - 'RUB': (("Сом", "Сомаш"), ("Кепек", "Кепекаш")), - 'USD': (("Доллар", "Доллараш"), ("Сент", "Сенташ")), - 'GBP': (("Фунт", "Фунташ"), ("Пенни", "Пенни")) - } + "EUR": (("Евро", "Евро"), ("Сент", "Сенташ")), + "RUB": (("Сом", "Сомаш"), ("Кепек", "Кепекаш")), + "USD": (("Доллар", "Доллараш"), ("Сент", "Сенташ")), + "GBP": (("Фунт", "Фунташ"), ("Пенни", "Пенни")), + } def setup(self): Num2Word_EU.setup(self) @@ -350,35 +349,27 @@ def __init__(self): pass def to_ordinal(self, number, clazz="д"): - # implement here your code. number is the integer to be transformed into an ordinal - # as a word (str) + # implement here your code. number is the integer to + # be transformed into an ordinal as a word (str) # which is returned return self.to_cardinal(number, clazz=clazz, case="ORD") def to_cardinal(self, number, clazz="д", case="abs"): if isinstance(number, float): entires = self.to_cardinal(int(number)) - float_part = str(number).split('.')[1] + float_part = str(number).split(".")[1] postfix = " ".join( # Drops the trailing zero and comma [self.to_cardinal(int(c)) for c in float_part] - ) + ) return entires + " " + DECIMALPOINT + " " + postfix elif number < 20: - #if case in CARDINALS[number]: - # return CARDINALS[number][case] - #else: - # add casesuffix to ABS stem return self.makecase(number, case, clazz) - #if CARDINALS[number]["abs"][-1] in "а": - # return CARDINALS[number]["abs"] + CARDINALS["casesuffix_voc"][case] - #else: - # return CARDINALS[number]["abs"] + CARDINALS["casesuffix_cons"][case] elif number < 100: twens = number // 20 units = number % 20 - base = twens*20 + base = twens * 20 if units == 0: return self.makecase(number, case, clazz) else: @@ -389,7 +380,9 @@ def to_cardinal(self, number, clazz="д", case="abs"): hundreds = number // 100 tens = number % 100 if hundreds > 1: - hundert = CARDINALS[hundreds]["attr"].replace("д*", clazz) + " " + hundert = ( + CARDINALS[hundreds]["attr"].replace("д*", clazz) + " " + ) else: hundert = "" if tens != 0: @@ -405,7 +398,11 @@ def to_cardinal(self, number, clazz="д", case="abs"): else: tcase = case if thousands > 1: - tausend = self.to_cardinal(thousands, clazz=clazz, case="attr") + " " + CARDINALS[1000][tcase] + tausend = ( + self.to_cardinal(thousands, clazz=clazz, case="attr") + + " " + + CARDINALS[1000][tcase] + ) else: tausend = self.makecase(1000, tcase, clazz) @@ -417,7 +414,7 @@ def to_cardinal(self, number, clazz="д", case="abs"): elif number < 10**34: out = [] - for pot in reversed([6,9,12,15,18,21,24,27,30,33]): + for pot in reversed([6, 9, 12, 15, 18, 21, 24, 27, 30, 33]): # 3 digits of billion, trillion etc step = number // 10**pot % 1000 if step > 0: @@ -431,19 +428,26 @@ def to_cardinal(self, number, clazz="д", case="abs"): return "NOT IMPLEMENTED" def _money_verbose(self, number, currency, case): - mcase ="attr" + mcase = "attr" if case != "abs": mcase = "obl" return self.to_cardinal(number, case=mcase) def _cents_verbose(self, number, currency, case): - mcase ="attr" + mcase = "attr" if case != "abs": mcase = "obl" - return self.to_cardinal(number, case="attr") + return self.to_cardinal(number, case=mcase) - def to_currency(self, val, currency='RUB', cents=True, separator=',', - adjective=False, case="abs"): + def to_currency( + self, + val, + currency="RUB", + cents=True, + separator=",", + adjective=False, + case="abs", + ): """ Args: val: Numeric value @@ -461,49 +465,41 @@ def to_currency(self, val, currency='RUB', cents=True, separator=',', cr1, cr2 = self.CURRENCY_FORMS[currency] devise = cr1[0] centime = cr2[0] - #if case != "abs": - # if devise[-1] in "аеиоуяю": - # devise += CARDINALS["casesuffix_voc"][case] - # else: - # devise += CARDINALS["casesuffix_cons"][case] - # if centime[-1] in "аеиоуяю": - # centime += CARDINALS["casesuffix_voc"][case] - # else: - # centime += CARDINALS["casesuffix_cons"][case] except KeyError: raise NotImplementedError( - 'Currency code "%s" not implemented for "%s"' % - (currency, self.__class__.__name__)) - - #if adjective and currency in self.CURRENCY_ADJECTIVES: - # cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) + 'Currency code "%s" not implemented for "%s"' + % (currency, self.__class__.__name__) + ) minus_str = "%s " % self.negword.strip() if is_negative else "" money_str = self._money_verbose(left, currency, case) - cents_str = self._cents_verbose(right, currency, case) \ - if cents else self._cents_terse(right, currency) - - - + cents_str = ( + self._cents_verbose(right, currency, case) + if cents + else self._cents_terse(right, currency) + ) - return u'%s%s %s%s %s %s' % ( + return "%s%s %s%s %s %s" % ( minus_str, money_str, - devise, # always singular + devise, # always singular separator, cents_str, - centime + centime, ) - def makecase(self, number, case, clazz): - #print("ZZZZ", number, CARDINALS[number]) + # print("ZZZZ", number, CARDINALS[number]) if case in CARDINALS[number]: return CARDINALS[number][case].replace("д*", clazz) else: if CARDINALS[number]["abs"][-1] in "а": - return CARDINALS[number]["abs"].replace("д*", clazz) + CARDINALS["casesuffix_voc"][case] + return ( + CARDINALS[number]["abs"].replace("д*", clazz) + + CARDINALS["casesuffix_voc"][case] + ) else: - return CARDINALS[number]["abs"].replace("д*", clazz) + CARDINALS["casesuffix_cons"][case] - - + return ( + CARDINALS[number]["abs"].replace("д*", clazz) + + CARDINALS["casesuffix_cons"][case] + ) diff --git a/num2words/lang_CY.py b/num2words/lang_CY.py index 6d6f3b1e..987b321d 100644 --- a/num2words/lang_CY.py +++ b/num2words/lang_CY.py @@ -16,8 +16,8 @@ from __future__ import unicode_literals -from .lang_EU import Num2Word_EU from .currency import parse_currency_parts, prefix_currency +from .lang_EU import Num2Word_EU # Welsh numerals differs to many other languages since the counted # object does not follow the numeral but is inserted between @@ -89,12 +89,12 @@ 12: [("deuddeg", None), (OBJ, None)], 13: [("tri", "AM"), (OBJ, None), ("ar ddeg", None)], 14: [("pedwar", None), (OBJ, None), ("ar ddeg", None)], - 15: [("pymtheg", None), (OBJ, None)], + 15: [("pymtheg", None), (OBJ, None)], 16: [("un", None), (OBJ, None), ("ar bymtheg", None)], 17: [("dau", "SM"), (OBJ, None), ("ar bymtheg", None)], 18: [("deunaw", None), (OBJ, None)], 19: [("pedwar", None), ("ar bymtheg", None)], - } +} CARDINAL_WORDS_FEM = { # masc, fem, triggers mutation @@ -118,22 +118,21 @@ 17: [("dwy", "SM"), (OBJ, None), ("ar bymtheg", None)], 18: [("deunaw", None), (OBJ, None)], 19: [("pedair", None), ("ar bymtheg", None)], - } - - +} -MILLION_WORDS = { 3: ("mil",None), - 6: ("miliwn",None), - 9: ("biliwn",None), - 12: ("triliwn", None), - 15: ("cwadriliwn", None), - 18: ("cwintiliwn", None), - 21: ("secsttiliwn", None), - 24: ("septiliwn", None), - 27: ("octiliwn", None), - 30: ("noniliwn", None), - 33: ("dengiliwn", None), - } +MILLION_WORDS = { + 3: ("mil", None), + 6: ("miliwn", None), + 9: ("biliwn", None), + 12: ("triliwn", None), + 15: ("cwadriliwn", None), + 18: ("cwintiliwn", None), + 21: ("secsttiliwn", None), + 24: ("septiliwn", None), + 27: ("octiliwn", None), + 30: ("noniliwn", None), + 33: ("dengiliwn", None), +} ORDINAL_WORDS = { 0: [("dimfed", None), (OBJ, None)], @@ -151,7 +150,7 @@ 12: [("deuddegfed", None), (OBJ, None)], 13: [("trydydd", None), (OBJ, None), ("ar ddeg", None)], 14: [("pedwerydd", None), (OBJ, None), ("ar ddeg", None)], - 15: [("pymthegfed", None), (OBJ, None)], + 15: [("pymthegfed", None), (OBJ, None)], 16: [("unfed", None), (OBJ, None), ("ar bymtheg", None)], 17: [("ail", "SM"), (OBJ, None), ("ar bymtheg", None)], 18: [("deunawfed", None), (OBJ, None)], @@ -173,7 +172,7 @@ 12: [("deuddegfed", None), (OBJ, None)], 13: [("trydedd", "SM"), (OBJ, None), ("ar ddeg", None)], 14: [("pedwaredd", "SM"), (OBJ, None), ("ar ddeg", None)], - 15: [("pymthegfed", None), (OBJ, None)], + 15: [("pymthegfed", None), (OBJ, None)], 16: [("unfed", None), (OBJ, None), ("ar bymtheg", None)], 17: [("ail", "SM"), (OBJ, None), ("ar bymtheg", None)], 18: [("deunawfed", None), (OBJ, None)], @@ -181,39 +180,49 @@ } # The script can extrapolate the missing numbers from the base forms. -STR_TENS = {1: [("ugain", None), (OBJ, None)], - 2: [("deugain", None), (OBJ, None)], - 3: [("trigain", None), (OBJ, None)], - 4: [("pedwar ugain", None), (OBJ, None)], - } - -ORD_STR_TENS = {1: [("ugainfed", None), (OBJ, None)], - 2: [("deugainfed", None), (OBJ, None)], - 3: [("trigainfed", None), (OBJ, None)], - 4: [("pedwar ugainfed", None), (OBJ, None)] - } -STR_TENS_INFORMAL = {1: ("undeg", None), 2: ("dauddeg", None), 3: ("trideg", None), - 4: ("pedwardeg", None), 5: ("pumdeg", None), 6: ("chwedeg", None), - 7: ("saithdeg", None), 8: ("wythdeg", None), 9: ("nawdeg", None) +STR_TENS = { + 1: [("ugain", None), (OBJ, None)], + 2: [("deugain", None), (OBJ, None)], + 3: [("trigain", None), (OBJ, None)], + 4: [("pedwar ugain", None), (OBJ, None)], } +ORD_STR_TENS = { + 1: [("ugainfed", None), (OBJ, None)], + 2: [("deugainfed", None), (OBJ, None)], + 3: [("trigainfed", None), (OBJ, None)], + 4: [("pedwar ugainfed", None), (OBJ, None)], +} +STR_TENS_INFORMAL = { + 1: ("undeg", None), + 2: ("dauddeg", None), + 3: ("trideg", None), + 4: ("pedwardeg", None), + 5: ("pumdeg", None), + 6: ("chwedeg", None), + 7: ("saithdeg", None), + 8: ("wythdeg", None), + 9: ("nawdeg", None), +} -GENERIC_DOLLARS = ('dolar', 'dolarau') -GENERIC_CENTS = ('ceiniog', 'ceiniogau') + +GENERIC_DOLLARS = ("dolar", "dolarau") +GENERIC_CENTS = ("ceiniog", "ceiniogau") CURRENCIES_FEM = ["GBP"] + class Num2Word_CY(Num2Word_EU): CURRENCY_FORMS = { # currency code: (sg, pl), (sg, pl) # in Welsh a noun after a numeral is ALWAYS in the singular - 'EUR': (('euro', 'euros'), GENERIC_CENTS), - 'USD': (GENERIC_DOLLARS, GENERIC_CENTS), - 'GBP': (('punt', 'punnoedd'), ('ceiniog', 'ceiniogau')), - 'CNY': (('yuan', 'yuans'), ('ffen', 'ffens')), + "EUR": (("euro", "euros"), GENERIC_CENTS), + "USD": (GENERIC_DOLLARS, GENERIC_CENTS), + "GBP": (("punt", "punnoedd"), ("ceiniog", "ceiniogau")), + "CNY": (("yuan", "yuans"), ("ffen", "ffens")), } - + MINUS_PREFIX_WORD = "meinws " FLOAT_INFIX_WORD = " pwynt " @@ -228,18 +237,22 @@ def float_to_words(self, float_number, ordinal=False): prefix = self.to_ordinal(int(float_number)) else: prefix = self.to_cardinal(int(float_number)) - float_part = str(float_number).split('.')[1] + float_part = str(float_number).split(".")[1] postfix = " ".join( # Drops the trailing zero and comma [self.to_cardinal(int(c)) for c in float_part] ) return prefix + Num2Word_CY.FLOAT_INFIX_WORD + postfix - - def hundred_group(self, number, informal=False, gender="masc", ordinal=False): + def hundred_group( + self, number, informal=False, gender="masc", ordinal=False + ): hundreds = number // 100 - until100 = number % 100 # 0 - 99 - result = [] # list group of number words and mutation info (for the following word) + until100 = number % 100 # 0 - 99 + # list group of number words and mutation info (for the following word) + result = ( + [] + ) if gender == "fem": CW = CARDINAL_WORDS_FEM else: @@ -253,7 +266,24 @@ def hundred_group(self, number, informal=False, gender="masc", ordinal=False): result.extend((CARDINAL_WORDS[hundreds])) result.extend([("cant", None), (OBJ, None)]) if until100: - if until100 in [1,8,11,16,20,21,31,36,41,48,61,68,71,81,88,91]: + if until100 in [ + 1, + 8, + 11, + 16, + 20, + 21, + 31, + 36, + 41, + 48, + 61, + 68, + 71, + 81, + 88, + 91, + ]: result.append(("ac", None)) else: result.append(("a", "AM")) @@ -274,14 +304,17 @@ def hundred_group(self, number, informal=False, gender="masc", ordinal=False): if units == 0: result.extend([("hanner cant", None), (OBJ, None)]) elif units == 1: - result.extend([("hanner cant ac un", None), (OBJ, None)]) + result.extend( + [("hanner cant ac un", None), (OBJ, None)] + ) else: result.append(("hanner cant a", "AM")) result.extend(CW[units]) else: - if (number < 20 and number > 0) or (number == 0 and hundreds == 0): + if (number < 20 and number > 0) or ( + number == 0 and hundreds == 0 + ): if gender == "fem": - result.extend(CARDINAL_WORDS_FEM[int(number)]) else: result.extend(CARDINAL_WORDS[int(number)]) @@ -293,7 +326,6 @@ def hundred_group(self, number, informal=False, gender="masc", ordinal=False): degau = ORD_STR_TENS.get(tens) else: degau = STR_TENS.get(tens) - #print("BBBB", number, tens, degau) #, softmutation(degau)) if units != 0: if tens > 1: @@ -310,9 +342,6 @@ def hundred_group(self, number, informal=False, gender="masc", ordinal=False): result.extend(degau) return result - - - def to_ordinal(self, number, informal=False, gender="masc"): if number < 20: return makestring(ORDINAL_WORDS[number]) @@ -321,10 +350,19 @@ def to_ordinal(self, number, informal=False, gender="masc"): elif number > 100: raise NotImplementedError("The given number is too large.") - return self.to_cardinal(number, informal=False, gender=gender, ordinal=True) - + return self.to_cardinal( + number, informal=False, gender=gender, ordinal=True + ) - def to_cardinal(self, number, informal=False, gender="masc", ordinal=False, counted=None, raw=False): + def to_cardinal( + self, + number, + informal=False, + gender="masc", + ordinal=False, + counted=None, + raw=False, + ): negative = False if number < 0: negative = True @@ -341,35 +379,46 @@ def to_cardinal(self, number, informal=False, gender="masc", ordinal=False, coun return self.float_to_words(number) # split in groups of 10**3 - groups = [] # groups of three digits starting from right (units (1 - 999), thousands, millions, .. - lowestgroup = None # find the lowest group of 3 digits > 0 for the ordinals - for pot in [3,6,9,12,15,18,21,24,27,30,33,36]: - gr = (number % 10**pot) // 10**(pot-3) + # groups of three digits starting from right (units (1 - 999), + # thousands, millions, ...) + groups = ( + [] + ) + lowestgroup = ( + None # find the lowest group of 3 digits > 0 for the ordinals + ) + for pot in [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36]: + gr = (number % 10**pot) // 10 ** (pot - 3) groups.append((gr, pot)) if gr and not lowestgroup: lowestgroup = gr - #print("groups", groups) + # print("groups", groups) result = [] if negative: result.append(("meinws", None)) - - for gr,pot in reversed(groups): + + for gr, pot in reversed(groups): if gr: - #print("AAAA", gr, pot, gender) + # print("AAAA", gr, pot, gender) if pot == 6: - g = "fem" # mil (1000) is feminine + g = "fem" # mil (1000) is feminine elif pot == 3: - g = gender # units depend on the following noun + g = gender # units depend on the following noun else: - g = "masc" # millions etc are masculine + g = "masc" # millions etc are masculine # "mil" is feminine if gr > 1 or pot == 3: - words = self.hundred_group(gr, informal=informal, gender=g, ordinal = ordinal and (lowestgroup==gr)) + words = self.hundred_group( + gr, + informal=informal, + gender=g, + ordinal=ordinal and (lowestgroup == gr), + ) result += words # print(">>>> ", words) if pot > 3: - result.append(MILLION_WORDS[pot-3]) + result.append(MILLION_WORDS[pot - 3]) if raw: # need to be able trigger correct mutation on currencies return result @@ -381,8 +430,9 @@ def to_cardinal(self, number, informal=False, gender="masc", ordinal=False, coun result.extend([("o", "SM"), (counted, None)]) return makestring(result) - def to_currency(self, val, currency='EUR', cents=True, separator=',', - adjective=False): + def to_currency( + self, val, currency="EUR", cents=True, separator=",", adjective=False + ): """ Args: val: Numeric value @@ -400,43 +450,48 @@ def to_currency(self, val, currency='EUR', cents=True, separator=',', except KeyError: raise NotImplementedError( - 'Currency code "%s" not implemented for "%s"' % - (currency, self.__class__.__name__)) + 'Currency code "%s" not implemented for "%s"' + % (currency, self.__class__.__name__) + ) if adjective and currency in self.CURRENCY_ADJECTIVES: cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) minus_str = "%s " % self.negword.strip() if is_negative else "" money_str = self._money_verbose(left, currency) - cents_str = self._cents_verbose(right, currency) \ - if cents else self._cents_terse(right, currency) + cents_str = ( + self._cents_verbose(right, currency) + if cents + else self._cents_terse(right, currency) + ) if right == 0: # no pence - return u'%s%s' % ( + return "%s%s" % ( minus_str, money_str, - #self.pluralize(right, cr2) - ) + # self.pluralize(right, cr2) + ) elif left == 0: # no pence - return u'%s%s' % ( + return "%s%s" % ( minus_str, cents_str, - #self.pluralize(right, cr2) - ) + # self.pluralize(right, cr2) + ) - return u'%s%s%s %s' % ( + return "%s%s%s %s" % ( minus_str, money_str, - #self.pluralize(left, cr1), + # self.pluralize(left, cr1), separator, cents_str, - #self.pluralize(right, cr2) + # self.pluralize(right, cr2) ) def _money_verbose(self, number, currency): - # used in super().to_currency(), we need to add gender here for feminine currenceis + # used in super().to_currency(), we need to add gender + # here for feminine currencies if currency in CURRENCIES_FEM: if number > 100: m = self.to_cardinal(number, gender="fem", raw=True) @@ -450,26 +505,25 @@ def _money_verbose(self, number, currency): return makestring(m) else: if number > 1: - m = self.to_cardinal(number, gender="fem", raw=True) + m = self.to_cardinal(number, gender="fem", raw=True) else: m = [(OBJ, None)] if currency in self.CURRENCY_FORMS: c = self.CURRENCY_FORMS[currency][0][0] else: c = currency - #print("eeeeeeeee", m) - #m.append((c, None)) - #print("fffffffff", m) + # print("eeeeeeeee", m) + # m.append((c, None)) + # print("fffffffff", m) return makestring(m, counted=c) else: return self.to_cardinal(number, raw=True) - def _cents_verbose(self, number, currency): if number == 0: return "" elif number > 100: - m = self.to_cardinal(number, raw=True) + m = self.to_cardinal(number, raw=True) if currency in self.CURRENCY_FORMS: c = self.CURRENCY_FORMS[currency][0][1] m.append(("o", "SM")) @@ -480,16 +534,13 @@ def _cents_verbose(self, number, currency): return makestring(m) else: if number > 1: - m = self.to_cardinal(number, raw=True) + m = self.to_cardinal(number, raw=True) else: m = [(OBJ, None)] if currency in self.CURRENCY_FORMS: c = self.CURRENCY_FORMS[currency][1][0] else: c = currency - #print("eeeeeeeee", m) - #m.append((c, None)) - #print("fffffffff", m) return makestring(m, counted=c) @@ -503,24 +554,26 @@ def makestring(result, counted=None): continue else: w = counted - counted = None # only first position + counted = None # only first position if lastmut: out.append(mutate(w, lastmut)) else: out.append(w) lastmut = mut return " ".join(out) - + + def mutate(word, mutation): - #print("uuu", number, word) + # print("uuu", number, word) if mutation == "SM": return softmutation(word) elif mutation == "AM": return aspiratedmutation(word) return word + def softmutation(word): - #print("SM<<<<%s>" % word) + # print("SM<<<<%s>" % word) if word[0] == "p" and word[1] != "h": return "b" + word[1:] elif word[0] == "t" and word[1] != "h": @@ -542,6 +595,7 @@ def softmutation(word): else: return word + def aspiratedmutation(word): if word[0] == "p" and word[1] != "h": return "ph" + word[1:] @@ -551,6 +605,3 @@ def aspiratedmutation(word): return "ch" + word[1:] else: return word - -#def omitt_if_zero(number_to_string): -# return "" if number_to_string == ZERO else number_to_string diff --git a/tests/test_ce.py b/tests/test_ce.py index f4f71ae1..4286c5d5 100644 --- a/tests/test_ce.py +++ b/tests/test_ce.py @@ -120,7 +120,6 @@ (97, "erg", "д", "дезткъе вуьрхӀиттамма"), (98, "instr", "й", "дезткъе берхӀиттанца"), (99, "instr", "б", "дезткъе ткъайеснанца"), - (0, "gen", "б", "нолан"), (100, "mat", "б", "бӀеннах"), (200, "attr", "д", "ши бӀе"), @@ -145,7 +144,7 @@ (2022, "comp", "д", "ши эзар ткъе шиннал"), (2100, "obl", "в", "ши эзар бӀен"), (423000, "erg", "в", "ви бӀе ткъе кхо эзарно"), - ] +] TEST_CASES_ORDINAL = [ (1, "all", "б", "цхьалгӀа"), @@ -278,11 +277,9 @@ (2033, "all", "д", "ши эзар ткъе кхойтталгӀа"), (2140, "dat", "б", "ши эзар бӀе шовзткъалгІа"), (423000, "dat", "д", "ди бӀе ткъе кхо эзарлагІа"), - ] +] -TEST_CASES_DECIMALS = [ - (123.4567, "бӀе ткъе кхоъ запятая диъ пхиъ ялх ворхӀ") - ] +TEST_CASES_DECIMALS = [(123.4567, "бӀе ткъе кхоъ запятая диъ пхиъ ялх ворхӀ")] TEST_CASES_MILLIONS = [ (200020, "ши бӀе эзар ткъа"), @@ -297,48 +294,100 @@ (2000200000000000, "ши биллиард ши бӀе миллиард"), (22002200000000000, "ткъе ши биллиард ши биллион ши бӀе миллиард"), (240024000000000000, "ши бӀе шовзткъе биллиард ткъе ди биллион"), - (2600260000000000000, "ши триллион ялх бӀе биллиард ши бӀе кхузткъе биллион"), - (28002800000000000000, "ткъе бархӀ триллион ши биллиард бархӀ бӀе биллион"), + ( + 2600260000000000000, + "ши триллион ялх бӀе биллиард ши бӀе кхузткъе биллион", + ), + ( + 28002800000000000000, + "ткъе бархӀ триллион ши биллиард бархӀ бӀе биллион", + ), (300030000000000000000, "кхо бӀе триллион ткъе итт биллиард"), - (3200320000000000000000, "кхо триллиард ши бӀе триллион кхо бӀе ткъе биллиард"), - (34003400000000000000000, "ткъе дейтта триллиард кхо триллион ди бӀе биллиард"), - (360036000000000000000000, "кхо бӀе кхузткъе триллиард ткъе ялхитта триллион"), - (3800380000000000000000000, "кхо квадриллион бархӀ бӀе триллиард кхо бӀе дезткъе триллион"), + ( + 3200320000000000000000, + "кхо триллиард ши бӀе триллион кхо бӀе ткъе биллиард", + ), + ( + 34003400000000000000000, + "ткъе дейтта триллиард кхо триллион ди бӀе биллиард", + ), + ( + 360036000000000000000000, + "кхо бӀе кхузткъе триллиард ткъе ялхитта триллион", + ), + ( + 3800380000000000000000000, + "кхо квадриллион бархӀ бӀе триллиард кхо бӀе дезткъе триллион", + ), (40004000000000000000000000, "шовзткъе квадриллион ди триллиард"), - (420042000000000000000000000, "ди бӀе ткъе квадриллион шовзткъе ши триллиард"), - (4400440000000000000000000000, "ди квадриллиард ди бӀе квадриллион ди бӀе шовзткъе триллиард"), - (46004600000000000000000000000, "шовзткъе ялх квадриллиард ди квадриллион ялх бӀе триллиард"), - (480048000000000000000000000000, "ди бӀе дезткъе квадриллиард шовзткъе бархӀ квадриллион"), + ( + 420042000000000000000000000, + "ди бӀе ткъе квадриллион шовзткъе ши триллиард", + ), + ( + 4400440000000000000000000000, + "ди квадриллиард ди бӀе квадриллион ди бӀе шовзткъе триллиард", + ), + ( + 46004600000000000000000000000, + "шовзткъе ялх квадриллиард ди квадриллион ялх бӀе триллиард", + ), + ( + 480048000000000000000000000000, + "ди бӀе дезткъе квадриллиард шовзткъе бархӀ квадриллион", + ), (5000500000000000000000000000000, "пхи квинтиллион пхи бӀе квадриллион"), - (52005200000000000000000000000000, "шовзткъе шийтта квинтиллион пхи квадриллиард ши бӀе квадриллион"), - (540054000000000000000000000000000, "пхи бӀе шовзткъе квинтиллион шовзткъе дейтта квадриллиард"), - (5600560000000000000000000000000000, "пхи квинтиллиард ялх бӀе квинтиллион пхи бӀе кхузткъе квадриллиард"), - ] + ( + 52005200000000000000000000000000, + "шовзткъе шийтта квинтиллион пхи квадриллиард ши бӀе квадриллион", + ), + ( + 540054000000000000000000000000000, + "пхи бӀе шовзткъе квинтиллион шовзткъе дейтта квадриллиард", + ), + ( + 5600560000000000000000000000000000, + "пхи квинтиллиард ялх бӀе квинтиллион пхи бӀе кхузткъе квадриллиард", + ), +] TEST_CURRENCY = [ - (143.55, "abs", "бӀе шовзткъе кхо Сом, шовзткъе пхийтта Кепек"), - ] + (143.55, "abs", "бӀе шовзткъе кхо Сом, шовзткъе пхийтта Кепек"), +] -class Num2WordsCETest(TestCase): +class Num2WordsCETest(TestCase): def test_number(self): for test in TEST_CASES_CARDINAL: - self.assertEqual(num2words(test[0], lang='ce', case=test[1], clazz=test[2]), test[3]) + self.assertEqual( + num2words(test[0], lang="ce", case=test[1], clazz=test[2]), + test[3], + ) def test_millions(self): for test in TEST_CASES_MILLIONS: - self.assertEqual(num2words(test[0], lang='ce'), test[1]) + self.assertEqual(num2words(test[0], lang="ce"), test[1]) def test_ordinal_number(self): for test in TEST_CASES_ORDINAL: - self.assertEqual(num2words(test[0], lang='ce', to="ordinal", clazz=test[2]), test[3]) + self.assertEqual( + num2words(test[0], lang="ce", to="ordinal", clazz=test[2]), + test[3], + ) def test_currency(self): for test in TEST_CURRENCY: - self.assertEqual(num2words(test[0], lang='ce', to="currency", currency="RUB", case=test[1]), test[2]) + self.assertEqual( + num2words( + test[0], + lang="ce", + to="currency", + currency="RUB", + case=test[1], + ), + test[2], + ) def test_decimals(self): for test in TEST_CASES_DECIMALS: - self.assertEqual(num2words(test[0], lang='ce'), test[1]) - - + self.assertEqual(num2words(test[0], lang="ce"), test[1]) diff --git a/tests/test_cy.py b/tests/test_cy.py index 7242df52..4ca9ed0e 100644 --- a/tests/test_cy.py +++ b/tests/test_cy.py @@ -145,7 +145,6 @@ (120, "cant ac ugain"), (121, "cant ac un ar hugain"), (122, "cant a dau ar hugain"), - (100, "cant"), (217, "dau gant a dau ar bymtheg"), (334, "tri chant a phedwar ar ddeg ar hugain"), @@ -154,7 +153,6 @@ (685, "chwech chant a phump a phedwar ugain"), (802, "wyth cant a dau"), (919, "naw cant a phedwar ar bymtheg"), - (100, "cant"), (200, "dau gant"), (300, "tri chant"), @@ -165,11 +163,13 @@ (800, "wyth cant"), (900, "naw cant"), (1000, "mil"), - (1000, "mil"), (12111, "deuddeg mil cant ac un ar ddeg"), (23222, "tair ar hugain mil dau gant a dau ar hugain"), - (34333, "pedair ar ddeg ar hugain mil tri chant a thri ar ddeg ar hugain"), + ( + 34333, + "pedair ar ddeg ar hugain mil tri chant a thri ar ddeg ar hugain", + ), (45444, "pump a deugain mil pedwar cant a phedwar a deugain"), (56555, "hanner cant a chwech mil pump cant a hanner a phump"), (67666, "saith a thrigain mil chwech chant a chwech a thrigain"), @@ -178,26 +178,37 @@ (100999, "cant mil naw cant a phedwar ar bymtheg a phedwar ugain"), (112110, "cant a deuddeg mil cant a deg"), (123221, "cant a thair ar hugain mil dau gant ac un ar hugain"), - (134332, "cant a phedair ar ddeg ar hugain mil tri chant a deuddeg ar hugain"), + ( + 134332, + "cant a phedair ar ddeg ar hugain mil tri chant a deuddeg ar hugain", + ), (145443, "cant a phump a deugain mil pedwar cant a thri a deugain"), (156554, "cant a hanner a chwech mil pump cant a hanner a phedwar"), - - (123, "cant a thri ar hugain"), (2345, "dwy fil tri chant a phump a deugain"), (34567, "pedair ar ddeg ar hugain mil pump cant a saith a thrigain"), (654321, "chwech chant a hanner a phedair mil tri chant ac un ar hugain"), - (7654321, "saith miliwn chwech chant a hanner a phedair mil tri chant ac un ar hugain"), - (987654321, "naw cant a saith a phedwar ugain miliwn chwech chant a hanner a phedair mil tri chant ac un ar hugain"), - (123456789012, "cant a thri ar hugain biliwn pedwar cant a hanner a chwech miliwn saith cant a naw a phedwar ugain mil deuddeg"), + ( + 7654321, + "saith miliwn chwech chant a hanner a " + "phedair mil tri chant ac un ar hugain", + ), + ( + 987654321, + "naw cant a saith a phedwar ugain miliwn chwech chant a " + "hanner a phedair mil tri chant ac un ar hugain", + ), + ( + 123456789012, + "cant a thri ar hugain biliwn pedwar cant a hanner a chwech miliwn " + "saith cant a naw a phedwar ugain mil deuddeg", + ), (2023, "dwy fil tri ar hugain"), (-40123, "meinws deugain mil cant a thri ar hugain"), (12340000000000000, "deuddeg cwadriliwn tri chant a deugain triliwn"), (3000000000000000, "tri chwadriliwn"), (2500000000000000000000000000000000, "dau ddengiliwn pump cant noniliwn"), - - - ) +) TEST_CASES_CARDINAL_FEM = ( @@ -366,21 +377,21 @@ (98, "deunawfed a phedwar ugain"), (99, "pedwerydd ar bymtheg a phedwar ugain"), (100, "canfed"), - ) +) TEST_CASES_DECIMALS = [ (123.4567, "cant a thri ar hugain pwynt pedwar pump chwech saith") - ] +] TEST_CASES_TO_CURRENCY_GBP = ( - (2.04, 'dwy bunt, pedwar ceiniog'), - (3.50, 'tair punt, hanner cant ceiniog'), - (2002.15, 'dwy fil dwy o bunnoedd, pymtheg ceiniog'), + (2.04, "dwy bunt, pedwar ceiniog"), + (3.50, "tair punt, hanner cant ceiniog"), + (2002.15, "dwy fil dwy o bunnoedd, pymtheg ceiniog"), (100.01, "cant punt, ceiniog"), (50.00, "hanner cant punt"), (51.00, "hanner cant ac un punt"), - (152.50, "cant a hanner a dwy o bunnoedd, hanner cant ceiniog") - ) + (152.50, "cant a hanner a dwy o bunnoedd, hanner cant ceiniog"), +) TEST_CASES_COUNTED = [ (2, "ci", "masc", "dau gi"), @@ -395,36 +406,42 @@ (26, "cath", "fem", "chwech chath ar hugain"), (42, "cath", "fem", "dwy gath a deugain"), (56, "cath", "fem", "hanner cant a chwech chath"), +] - ] class Num2WordsCYTest(TestCase): - def test_number(self): for test in TEST_CASES_CARDINAL: - self.assertEqual(num2words(test[0], lang='cy'), test[1]) + self.assertEqual(num2words(test[0], lang="cy"), test[1]) def test_number_fem(self): for test in TEST_CASES_CARDINAL_FEM: - self.assertEqual(num2words(test[0], lang='cy', gender="fem"), test[1]) + self.assertEqual( + num2words(test[0], lang="cy", gender="fem"), test[1] + ) def test_decimals(self): for test in TEST_CASES_DECIMALS: - self.assertEqual(num2words(test[0], lang='cy'), test[1]) - + self.assertEqual(num2words(test[0], lang="cy"), test[1]) def test_ordinals(self): for test in TEST_CASES_ORDINAL: - self.assertEqual(num2words(test[0], lang='cy', to="ordinal"), test[1]) + self.assertEqual( + num2words(test[0], lang="cy", to="ordinal"), test[1] + ) def test_pounds(self): for test in TEST_CASES_TO_CURRENCY_GBP: - self.assertEqual(num2words(test[0], lang='cy', to="currency", currency="GBP"), test[1]) - + self.assertEqual( + num2words(test[0], lang="cy", to="currency", currency="GBP"), + test[1], + ) def test_counted(self): for test in TEST_CASES_COUNTED: - self.assertEqual(num2words(test[0], lang='cy', counted=test[1], gender=test[2]), test[3]) - - - + self.assertEqual( + num2words( + test[0], lang="cy", counted=test[1], gender=test[2] + ), + test[3], + ) From f79af95e224adcc9b9e9182428cde38994b3bc47 Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Thu, 9 Nov 2023 14:29:22 +0100 Subject: [PATCH 5/9] added support for "to_ordinal_num" and "year" --- num2words/lang_CE.py | 7 +++++++ tests/test_ce.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/num2words/lang_CE.py b/num2words/lang_CE.py index 1e6ed99f..efe1614c 100644 --- a/num2words/lang_CE.py +++ b/num2words/lang_CE.py @@ -488,6 +488,13 @@ def to_currency( centime, ) + def to_ordinal_num(self, number): + self.verify_ordinal(number) + return str(number) + "." + + def to_year(self, year, case="abs"): + return self.to_cardinal(year, case=case) + def makecase(self, number, case, clazz): # print("ZZZZ", number, CARDINALS[number]) if case in CARDINALS[number]: diff --git a/tests/test_ce.py b/tests/test_ce.py index 4286c5d5..a91c5c63 100644 --- a/tests/test_ce.py +++ b/tests/test_ce.py @@ -279,6 +279,12 @@ (423000, "dat", "д", "ди бӀе ткъе кхо эзарлагІа"), ] +TEST_CASES_YEAR = [ + (1719, "abs", "эзар ворхӀ бӀе ткъайесна"), + (1812, "abs", "эзар бархӀ бӀе шийтта"), + (1926, "abs", "эзар исс бӀе ткъе ялх"), +] + TEST_CASES_DECIMALS = [(123.4567, "бӀе ткъе кхоъ запятая диъ пхиъ ялх ворхӀ")] TEST_CASES_MILLIONS = [ @@ -374,6 +380,15 @@ def test_ordinal_number(self): num2words(test[0], lang="ce", to="ordinal", clazz=test[2]), test[3], ) + self.assertEqual(num2words(3, to="ordinal_num", lang='ce'), "3.") + self.assertEqual(num2words(5, to="ordinal_num", lang='ce'), "5.") + self.assertEqual(num2words(82, to="ordinal_num", lang='ce'), "82.") + + def test_year(self): + for test in TEST_CASES_YEAR: + self.assertEqual( + num2words(test[0], lang="ce", to="year", case=test[1]), test[2] + ) def test_currency(self): for test in TEST_CURRENCY: From d5bca219e929433b564d8384f4248a4b0cc837be Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Thu, 9 Nov 2023 16:17:10 +0100 Subject: [PATCH 6/9] improved floats, increased test coverage --- num2words/lang_CE.py | 20 +++++++++++++++----- tests/test_ce.py | 20 +++++++++++++------- tests/test_cy.py | 10 ++++++++++ 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/num2words/lang_CE.py b/num2words/lang_CE.py index efe1614c..abf8e5f0 100644 --- a/num2words/lang_CE.py +++ b/num2words/lang_CE.py @@ -330,7 +330,8 @@ MINUS = "минус" -DECIMALPOINT = "запятая" # check ! +# DECIMALPOINT = "запятая" # check ! +DECIMALPOINT = "а" class Num2Word_CE(Num2Word_EU): @@ -344,9 +345,18 @@ class Num2Word_CE(Num2Word_EU): def setup(self): Num2Word_EU.setup(self) - - def __init__(self): - pass + self.negword = "минус" + self.pointword = "запятая" # check ! + # self.errmsg_nonnum = ( + # u"Seulement des nombres peuvent être convertis en mots." + # ) + # self.errmsg_toobig = ( + # u"Nombre trop grand pour être converti en mots (abs(%s) > %s)." + # ) + # self.exclude_title = ["et", "virgule", "moins"] + self.mid_numwords = [] + self.low_numwords = [] + self.ords = {} def to_ordinal(self, number, clazz="д"): # implement here your code. number is the integer to @@ -490,7 +500,7 @@ def to_currency( def to_ordinal_num(self, number): self.verify_ordinal(number) - return str(number) + "." + return str(number) + "-й" def to_year(self, year, case="abs"): return self.to_cardinal(year, case=case) diff --git a/tests/test_ce.py b/tests/test_ce.py index a91c5c63..7deddfe6 100644 --- a/tests/test_ce.py +++ b/tests/test_ce.py @@ -285,7 +285,7 @@ (1926, "abs", "эзар исс бӀе ткъе ялх"), ] -TEST_CASES_DECIMALS = [(123.4567, "бӀе ткъе кхоъ запятая диъ пхиъ ялх ворхӀ")] +TEST_CASES_DECIMALS = [(123.4567, "бӀе ткъе кхоъ а диъ пхиъ ялх ворхӀ")] TEST_CASES_MILLIONS = [ (200020, "ши бӀе эзар ткъа"), @@ -355,10 +355,12 @@ 5600560000000000000000000000000000, "пхи квинтиллиард ялх бӀе квинтиллион пхи бӀе кхузткъе квадриллиард", ), + (10**56, "NOT IMPLEMENTED") ] TEST_CURRENCY = [ - (143.55, "abs", "бӀе шовзткъе кхо Сом, шовзткъе пхийтта Кепек"), + (143.55, "abs", "RUB", "бӀе шовзткъе кхо Сом, шовзткъе пхийтта Кепек"), + (243.15, "dat", "RUB", "ши бӀе шовзткъе кхона Сом, пхийттан Кепек"), ] @@ -380,9 +382,9 @@ def test_ordinal_number(self): num2words(test[0], lang="ce", to="ordinal", clazz=test[2]), test[3], ) - self.assertEqual(num2words(3, to="ordinal_num", lang='ce'), "3.") - self.assertEqual(num2words(5, to="ordinal_num", lang='ce'), "5.") - self.assertEqual(num2words(82, to="ordinal_num", lang='ce'), "82.") + self.assertEqual(num2words(3, to="ordinal_num", lang='ce'), "3-й") + self.assertEqual(num2words(5, to="ordinal_num", lang='ce'), "5-й") + self.assertEqual(num2words(82, to="ordinal_num", lang='ce'), "82-й") def test_year(self): for test in TEST_CASES_YEAR: @@ -397,12 +399,16 @@ def test_currency(self): test[0], lang="ce", to="currency", - currency="RUB", + currency=test[2], case=test[1], ), - test[2], + test[3], ) + def test_currency_missing(self): + with self.assertRaises(NotImplementedError): + num2words(2.45, to="currency", lang='cy', currency="DEM") + def test_decimals(self): for test in TEST_CASES_DECIMALS: self.assertEqual(num2words(test[0], lang="ce"), test[1]) diff --git a/tests/test_cy.py b/tests/test_cy.py index 4ca9ed0e..ff47d031 100644 --- a/tests/test_cy.py +++ b/tests/test_cy.py @@ -379,6 +379,14 @@ (100, "canfed"), ) +TEST_CASES_YEAR = [ + (1922, "mil naw dau dau"), + (1989, "mil naw wyth naw"), + (1812, "mil wyth un dau"), + (2012, "dwy fil deuddeg"), + (2023, "dwy fil tri ar hugain") + ] + TEST_CASES_DECIMALS = [ (123.4567, "cant a thri ar hugain pwynt pedwar pump chwech saith") ] @@ -445,3 +453,5 @@ def test_counted(self): ), test[3], ) + +# TODO 'ordinal_num', 'year' From 28e3f0ef240db13fa9a315efcd4dce119f0b5c5d Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Thu, 9 Nov 2023 17:25:23 +0100 Subject: [PATCH 7/9] coverage trying to improve ... --- num2words/lang_CY.py | 26 ++++++++++++++------------ tests/test_cy.py | 6 ++++++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/num2words/lang_CY.py b/num2words/lang_CY.py index 987b321d..f38c7a5e 100644 --- a/num2words/lang_CY.py +++ b/num2words/lang_CY.py @@ -16,7 +16,7 @@ from __future__ import unicode_literals -from .currency import parse_currency_parts, prefix_currency +from .currency import parse_currency_parts from .lang_EU import Num2Word_EU # Welsh numerals differs to many other languages since the counted @@ -232,11 +232,11 @@ def setup(self): def __init__(self): pass - def float_to_words(self, float_number, ordinal=False): - if ordinal: - prefix = self.to_ordinal(int(float_number)) - else: - prefix = self.to_cardinal(int(float_number)) + def float_to_words(self, float_number): + # if ordinal: + # prefix = self.to_ordinal(int(float_number)) + # else: + prefix = self.to_cardinal(int(float_number)) float_part = str(float_number).split(".")[1] postfix = " ".join( # Drops the trailing zero and comma @@ -288,9 +288,9 @@ def hundred_group( else: result.append(("a", "AM")) if until100: - if informal: - pass - elif not ordinal and until100 >= 50 and until100 <= 59: + # if informal: + # pass + if not ordinal and until100 >= 50 and until100 <= 59: units = number % 10 if hundreds > 0: if units == 0: @@ -454,8 +454,8 @@ def to_currency( % (currency, self.__class__.__name__) ) - if adjective and currency in self.CURRENCY_ADJECTIVES: - cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) + # if adjective and currency in self.CURRENCY_ADJECTIVES: + # cr1 = prefix_currency(self.CURRENCY_ADJECTIVES[currency], cr1) minus_str = "%s " % self.negword.strip() if is_negative else "" money_str = self._money_verbose(left, currency) @@ -473,7 +473,7 @@ def to_currency( # self.pluralize(right, cr2) ) elif left == 0: - # no pence + # no pounds return "%s%s" % ( minus_str, cents_str, @@ -506,6 +506,8 @@ def _money_verbose(self, number, currency): else: if number > 1: m = self.to_cardinal(number, gender="fem", raw=True) + elif number == 0: + m = self.to_cardinal(number, gender="fem", raw=True) else: m = [(OBJ, None)] if currency in self.CURRENCY_FORMS: diff --git a/tests/test_cy.py b/tests/test_cy.py index ff47d031..d8196709 100644 --- a/tests/test_cy.py +++ b/tests/test_cy.py @@ -154,6 +154,7 @@ (802, "wyth cant a dau"), (919, "naw cant a phedwar ar bymtheg"), (100, "cant"), + (150, "cant a hanner"), (200, "dau gant"), (300, "tri chant"), (400, "pedwar cant"), @@ -392,6 +393,7 @@ ] TEST_CASES_TO_CURRENCY_GBP = ( + (0.00, "dim punt"), (2.04, "dwy bunt, pedwar ceiniog"), (3.50, "tair punt, hanner cant ceiniog"), (2002.15, "dwy fil dwy o bunnoedd, pymtheg ceiniog"), @@ -428,6 +430,10 @@ def test_number_fem(self): num2words(test[0], lang="cy", gender="fem"), test[1] ) + def test_number_not_implemented(self): + with self.assertRaises(NotImplementedError): + num2words(10**66, lang='cy') + def test_decimals(self): for test in TEST_CASES_DECIMALS: self.assertEqual(num2words(test[0], lang="cy"), test[1]) From b0547a13a2b14fd13b42b1cb4b9396991a9535c2 Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Thu, 9 Nov 2023 20:41:11 +0100 Subject: [PATCH 8/9] improved coverage tests do pass in 333->343, 341->343, 572->exit but coverage marks as not testes --- num2words/README.md | 3 +- num2words/lang_CY.py | 92 ++++++++++++++++++++++---------------------- tests/test_cy.py | 12 ++++++ 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/num2words/README.md b/num2words/README.md index 9f873881..b7e2e0fa 100644 --- a/num2words/README.md +++ b/num2words/README.md @@ -61,9 +61,10 @@ For this install the packages needed to test pip install -r requirements-test.txt ``` -and run `tox` +run `tox` and `coverage` to check that the code is well formated and all parts of the code are tested ``` tox +python3 -m coverage report -m ``` diff --git a/num2words/lang_CY.py b/num2words/lang_CY.py index f38c7a5e..e5045129 100644 --- a/num2words/lang_CY.py +++ b/num2words/lang_CY.py @@ -226,8 +226,8 @@ class Num2Word_CY(Num2Word_EU): MINUS_PREFIX_WORD = "meinws " FLOAT_INFIX_WORD = " pwynt " - def setup(self): - Num2Word_EU.setup(self) +# def setup(self): +# Num2Word_EU.setup(self) def __init__(self): pass @@ -492,57 +492,57 @@ def to_currency( def _money_verbose(self, number, currency): # used in super().to_currency(), we need to add gender # here for feminine currencies - if currency in CURRENCIES_FEM: - if number > 100: - m = self.to_cardinal(number, gender="fem", raw=True) - if currency in self.CURRENCY_FORMS: - c = self.CURRENCY_FORMS[currency][0][1] - m.append(("o", "SM")) - m.append((c, None)) - else: - c = currency - m.append((c, None)) - return makestring(m) - else: - if number > 1: - m = self.to_cardinal(number, gender="fem", raw=True) - elif number == 0: - m = self.to_cardinal(number, gender="fem", raw=True) - else: - m = [(OBJ, None)] - if currency in self.CURRENCY_FORMS: - c = self.CURRENCY_FORMS[currency][0][0] - else: - c = currency - # print("eeeeeeeee", m) - # m.append((c, None)) - # print("fffffffff", m) - return makestring(m, counted=c) + # if currency in CURRENCIES_FEM: # always true in this context + if number > 100: + m = self.to_cardinal(number, gender="fem", raw=True) + # if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][1] + m.append(("o", "SM")) + m.append((c, None)) + # else: + # c = currency + # m.append((c, None)) + return makestring(m) else: - return self.to_cardinal(number, raw=True) + # if number > 1: + m = self.to_cardinal(number, gender="fem", raw=True) + # elif number == 0: + # m = self.to_cardinal(number, gender="fem", raw=True) + # else: + # m = [(OBJ, None)] + # if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][0][0] + # else: + # c = currency + # print("eeeeeeeee", m) + # m.append((c, None)) + # print("fffffffff", m) + return makestring(m, counted=c) + # else: + # return self.to_cardinal(number, raw=True) def _cents_verbose(self, number, currency): if number == 0: return "" - elif number > 100: - m = self.to_cardinal(number, raw=True) - if currency in self.CURRENCY_FORMS: - c = self.CURRENCY_FORMS[currency][0][1] - m.append(("o", "SM")) - m.append((c, None)) - else: - c = currency - m.append((c, None)) - return makestring(m) + # elif number > 100: + # m = self.to_cardinal(number, raw=True) + # # if currency in self.CURRENCY_FORMS: + # c = self.CURRENCY_FORMS[currency][0][1] + # m.append(("o", "SM")) + # m.append((c, None)) + # # else: + # # c = currency + # # m.append((c, None)) + # return makestring(m) else: if number > 1: m = self.to_cardinal(number, raw=True) else: m = [(OBJ, None)] - if currency in self.CURRENCY_FORMS: - c = self.CURRENCY_FORMS[currency][1][0] - else: - c = currency + # if currency in self.CURRENCY_FORMS: + c = self.CURRENCY_FORMS[currency][1][0] + # else: + # c = currency return makestring(m, counted=c) @@ -566,12 +566,12 @@ def makestring(result, counted=None): def mutate(word, mutation): - # print("uuu", number, word) + # print("uuu", word, mutation) if mutation == "SM": return softmutation(word) elif mutation == "AM": return aspiratedmutation(word) - return word + # return word # does not occur def softmutation(word): @@ -584,8 +584,6 @@ def softmutation(word): return "g" + word[1:] elif word[0] == "b" or word[0] == "m": return "f" + word[1:] - elif word[0] == "h": - return word[1:] elif word[0] == "d" and word[1] != "d": return "d" + word elif word.startswith("ll"): diff --git a/tests/test_cy.py b/tests/test_cy.py index d8196709..3f7ee038 100644 --- a/tests/test_cy.py +++ b/tests/test_cy.py @@ -394,6 +394,7 @@ TEST_CASES_TO_CURRENCY_GBP = ( (0.00, "dim punt"), + (0.23, "tri cheiniog ar hugain"), (2.04, "dwy bunt, pedwar ceiniog"), (3.50, "tair punt, hanner cant ceiniog"), (2002.15, "dwy fil dwy o bunnoedd, pymtheg ceiniog"), @@ -405,6 +406,9 @@ TEST_CASES_COUNTED = [ (2, "ci", "masc", "dau gi"), + (2, "ty", "masc", "dau dy"), + (2, "llwy", "fem", "dwy lwy"), + (2, "rhaglen", "masc", "dau raglen"), (11, "ci", "masc", "un ci ar ddeg"), (13, "ci", "masc", "tri chi ar ddeg"), (26, "ci", "masc", "chwech chi ar hugain"), @@ -444,6 +448,10 @@ def test_ordinals(self): num2words(test[0], lang="cy", to="ordinal"), test[1] ) + def test_ordinal_not_implemented(self): + with self.assertRaises(NotImplementedError): + num2words(101, lang='cy', to="ordinal") + def test_pounds(self): for test in TEST_CASES_TO_CURRENCY_GBP: self.assertEqual( @@ -451,6 +459,10 @@ def test_pounds(self): test[1], ) + def test_other_cur(self): + with self.assertRaises(NotImplementedError): + num2words(10.23, lang="cy", to="currency", currency="DEM"), + def test_counted(self): for test in TEST_CASES_COUNTED: self.assertEqual( From 69dd3e8a012f596c0ed96ea365a413fab0ff96aa Mon Sep 17 00:00:00 2001 From: Johannes Heinecke Date: Thu, 28 Mar 2024 13:21:44 +0100 Subject: [PATCH 9/9] reformated imports since isort complained --- num2words/__init__.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/num2words/__init__.py b/num2words/__init__.py index 1f586b20..95dbcd7a 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -17,14 +17,15 @@ from __future__ import unicode_literals -from . import (lang_AM, lang_AR, lang_AZ, lang_BY, lang_CE, lang_CY, lang_CZ, lang_DE, lang_DK, - lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO, - lang_ES_CR, lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, - lang_FI, lang_FR, lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, - lang_HU, lang_ID, lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, - lang_KZ, lang_LT, lang_LV, lang_NL, lang_NO, lang_PL, lang_PT, - lang_PT_BR, lang_RO, lang_RU, lang_SK, lang_SL, lang_SR, - lang_SV, lang_TE, lang_TG, lang_TH, lang_TR, lang_UK, lang_VI) +from . import (lang_AM, lang_AR, lang_AZ, lang_BY, lang_CE, lang_CY, lang_CZ, + lang_DE, lang_DK, lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, + lang_ES, lang_ES_CO, lang_ES_CR, lang_ES_GT, lang_ES_NI, + lang_ES_VE, lang_FA, lang_FI, lang_FR, lang_FR_BE, lang_FR_CH, + lang_FR_DZ, lang_HE, lang_HU, lang_ID, lang_IS, lang_IT, + lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT, lang_LV, lang_NL, + lang_NO, lang_PL, lang_PT, lang_PT_BR, lang_RO, lang_RU, + lang_SK, lang_SL, lang_SR, lang_SV, lang_TE, lang_TG, lang_TH, + lang_TR, lang_UK, lang_VI) CONVERTER_CLASSES = { 'am': lang_AM.Num2Word_AM(),