Skip to content

Commit

Permalink
[* Currency] Expanded recognition of currency value when using [curr…
Browse files Browse the repository at this point in the history
…encyname]$ (e.g. USD$) (#2711) (#2756)

* Expanded recognition of currency value when using [currencyname]$ (e.g. USD$) (#2711)

* Propagated to NL, DE, HI, IT, SV, TR. PT, ES, FR to be treated in separate issue.

Co-authored-by: LionbridgeCS2 <[email protected]>
  • Loading branch information
aitelint and LionbridgeCS2 authored Nov 10, 2021
1 parent 52c16ac commit 4db5674
Show file tree
Hide file tree
Showing 22 changed files with 984 additions and 295 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ public static class NumbersWithUnitDefinitions
{ @"Rwandan franc", @"RWF" },
{ @"Russian ruble", @"RUB" },
{ @"Transnistrian ruble", @"PRB" },
{ @"Belarusian ruble", @"BYN" },
{ @"New Belarusian ruble", @"BYN" },
{ @"Algerian dinar", @"DZD" },
{ @"Bahraini dinar", @"BHD" },
{ @"Iraqi dinar", @"IQD" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,156 +312,156 @@ public static class NumbersWithUnitDefinitions
{ @"Afghan afghani", @"AFN" },
{ @"Euro", @"EUR" },
{ @"Albanian lek", @"ALL" },
{ @"Kwanza angolais", @"AOA" },
{ @"Angolan kwanza", @"AOA" },
{ @"Armenian dram", @"AMD" },
{ @"Florins d'Aruba", @"AWG" },
{ @"Aruban florin", @"AWG" },
{ @"Bangladeshi taka", @"BDT" },
{ @"Ngultrum bhoutanais", @"BTN" },
{ @"Boliviano bolivien", @"BOB" },
{ @"Bosnie-Herzégovine mark convertible", @"BAM" },
{ @"Pula", @"BWP" },
{ @"Réal brésilien", @"BRL" },
{ @"Lev bulgare", @"BGN" },
{ @"Riel cambodgien", @"KHR" },
{ @"Escudo du cap-vert", @"CVE" },
{ @"Colon du costa rica", @"CRC" },
{ @"Kuna croate", @"HRK" },
{ @"Couronne tchèque", @"CZK" },
{ @"Nakfas érythréens", @"ERN" },
{ @"Birr éthiopien", @"ETB" },
{ @"Dalasi gambienne", @"GMD" },
{ @"Lari géorgien", @"GEL" },
{ @"Cedi", @"GHS" },
{ @"Quetzal guatémaltèque", @"GTQ" },
{ @"Gourdes haïtiennes", @"HTG" },
{ @"Lempira hondurien", @"HNL" },
{ @"Forint hongrois", @"HUF" },
{ @"Rial iranien", @"IRR" },
{ @"Bhutanese ngultrum", @"BTN" },
{ @"Bolivian boliviano", @"BOB" },
{ @"Bosnia and Herzegovina convertible mark", @"BAM" },
{ @"Botswana pula", @"BWP" },
{ @"Brazilian real", @"BRL" },
{ @"Bulgarian lev", @"BGN" },
{ @"Cambodian riel", @"KHR" },
{ @"Cape Verdean escudo", @"CVE" },
{ @"Costa Rican colón", @"CRC" },
{ @"Croatian kuna", @"HRK" },
{ @"Czech koruna", @"CZK" },
{ @"Eritrean nakfa", @"ERN" },
{ @"Ethiopian birr", @"ETB" },
{ @"Gambian dalasi", @"GMD" },
{ @"Georgian lari", @"GEL" },
{ @"Ghanaian cedi", @"GHS" },
{ @"Guatemalan quetzal", @"GTQ" },
{ @"Haitian gourde", @"HTG" },
{ @"Honduran lempira", @"HNL" },
{ @"Hungarian forint", @"HUF" },
{ @"Iranian rial", @"IRR" },
{ @"Yemeni rial", @"YER" },
{ @"Israeli new shekel", @"ILS" },
{ @"Yen Japonais", @"JPY" },
{ @"Tenge kazakh", @"KZT" },
{ @"Shilling kényan", @"KES" },
{ @"Corée du nord won", @"KPW" },
{ @"Won sud-coréen", @"KRW" },
{ @"Som Kirghizie", @"KGS" },
{ @"Kip laotien", @"LAK" },
{ @"Loti", @"LSL" },
{ @"Rand sud-africain", @"ZAR" },
{ @"Pataca macanais", @"MOP" },
{ @"Dinar macédonien", @"MKD" },
{ @"Ariary malagache", @"MGA" },
{ @"Kwacha malawien", @"MWK" },
{ @"Ringitt malaisien", @"MYR" },
{ @"Ouguiya mauritanienne", @"MRO" },
{ @"Togrogs mongoles", @"MNT" },
{ @"Metical mozambique", @"MZN" },
{ @"Kyat birmanie", @"MMK" },
{ @"Cordoba nicaraguayen", @"NIO" },
{ @"Japanese yen", @"JPY" },
{ @"Kazakhstani tenge", @"KZT" },
{ @"Kenyan shilling", @"KES" },
{ @"North Korean won", @"KPW" },
{ @"South Korean won", @"KRW" },
{ @"Kyrgyzstani som", @"KGS" },
{ @"Lao kip", @"LAK" },
{ @"Lesotho loti", @"LSL" },
{ @"South African rand", @"ZAR" },
{ @"Macanese pataca", @"MOP" },
{ @"Macedonian denar", @"MKD" },
{ @"Malagasy ariary", @"MGA" },
{ @"Malawian kwacha", @"MWK" },
{ @"Malaysian ringgit", @"MYR" },
{ @"Mauritanian ouguiya", @"MRO" },
{ @"Mongolian tögrög", @"MNT" },
{ @"Mozambican metical", @"MZN" },
{ @"Burmese kyat", @"MMK" },
{ @"Nicaraguan córdoba", @"NIO" },
{ @"Nigerian naira", @"NGN" },
{ @"Livre turque", @"TRY" },
{ @"Rials omanais", @"OMR" },
{ @"Balboa panaméennes", @"PAB" },
{ @"Kina", @"PGK" },
{ @"Guaraní paraguayen", @"PYG" },
{ @"Turkish lira", @"TRY" },
{ @"Omani rial", @"OMR" },
{ @"Panamanian balboa", @"PAB" },
{ @"Papua New Guinean kina", @"PGK" },
{ @"Paraguayan guaraní", @"PYG" },
{ @"Peruvian sol", @"PEN" },
{ @"Złoty polonais", @"PLN" },
{ @"Riyal qatari", @"QAR" },
{ @"Riyal saudi", @"SAR" },
{ @"Tala", @"WST" },
{ @"Polish złoty", @"PLN" },
{ @"Qatari riyal", @"QAR" },
{ @"Saudi riyal", @"SAR" },
{ @"Samoan tālā", @"WST" },
{ @"São Tomé and Príncipe dobra", @"STN" },
{ @"Leone", @"SLL" },
{ @"Lilangeni", @"SZL" },
{ @"Somoni tadjikistan", @"TJS" },
{ @"Baht thaïlandais", @"THB" },
{ @"Hryvnia ukrainien", @"UAH" },
{ @"Sierra Leonean leone", @"SLL" },
{ @"Swazi lilangeni", @"SZL" },
{ @"Tajikistani somoni", @"TJS" },
{ @"Thai baht", @"THB" },
{ @"Ukrainian hryvnia", @"UAH" },
{ @"Vanuatu vatu", @"VUV" },
{ @"Bolívar vénézuélien", @"VEF" },
{ @"Kwacha de Zambie", @"ZMW" },
{ @"Dirham marocain", @"MAD" },
{ @"Dirham des Émirats arabes unis", @"AED" },
{ @"Manat azerbaïdjanais", @"AZN" },
{ @"Manat turkmène", @"TMT" },
{ @"Shilling somalien", @"SOS" },
{ @"Shilling tanzanien", @"TZS" },
{ @"Shilling ougandais", @"UGX" },
{ @"Leu roumain", @"RON" },
{ @"Leu moldave", @"MDL" },
{ @"Roupie népalaise", @"NPR" },
{ @"Roupie pakistanaise", @"PKR" },
{ @"Roupie indienne", @"INR" },
{ @"Roupie seychelloise", @"SCR" },
{ @"Roupie mauricienne", @"MUR" },
{ @"Rufiyaa maldives", @"MVR" },
{ @"Venezuelan bolívar", @"VEF" },
{ @"Zambian kwacha", @"ZMW" },
{ @"Moroccan dirham", @"MAD" },
{ @"United Arab Emirates dirham", @"AED" },
{ @"Azerbaijani manat", @"AZN" },
{ @"Turkmenistan manat", @"TMT" },
{ @"Somali shilling", @"SOS" },
{ @"Tanzanian shilling", @"TZS" },
{ @"Ugandan shilling", @"UGX" },
{ @"Romanian leu", @"RON" },
{ @"Moldovan leu", @"MDL" },
{ @"Nepalese rupee", @"NPR" },
{ @"Pakistani rupee", @"PKR" },
{ @"Indian rupee", @"INR" },
{ @"Seychellois rupee", @"SCR" },
{ @"Mauritian rupee", @"MUR" },
{ @"Maldivian rufiyaa", @"MVR" },
{ @"Sri Lankan rupee", @"LKR" },
{ @"Rupiah Indonésie", @"IDR" },
{ @"Couronne danoise", @"DKK" },
{ @"Couronne norvégienne", @"NOK" },
{ @"Indonesian rupiah", @"IDR" },
{ @"Danish krone", @"DKK" },
{ @"Norwegian krone", @"NOK" },
{ @"Icelandic króna", @"ISK" },
{ @"Couronne suédoise", @"SEK" },
{ @"Franc CFA de l'Afrique de l'Ouest", @"XOF" },
{ @"Franc CFA d'Afrique centrale", @"XAF" },
{ @"Franc comorien", @"KMF" },
{ @"Franc congolais", @"CDF" },
{ @"Franc burundais", @"BIF" },
{ @"Franc djiboutienne", @"DJF" },
{ @"Franc CFP", @"XPF" },
{ @"Franc guinéen", @"GNF" },
{ @"Franc Suisse", @"CHF" },
{ @"Franc rwandais", @"RWF" },
{ @"Rouble russe", @"RUB" },
{ @"Rouble transnistriens", @"PRB" },
{ @"Nouveau rouble biélorusse", @"BYN" },
{ @"Dinar algérien", @"DZD" },
{ @"Dinar de bahreïn", @"BHD" },
{ @"Dinar iraquien", @"IQD" },
{ @"Dinar jordanien", @"JOD" },
{ @"Dinar koweïtien", @"KWD" },
{ @"Dinar libyen", @"LYD" },
{ @"Dinar serbe", @"RSD" },
{ @"Dinar tunisien", @"TND" },
{ @"Peso argentin", @"ARS" },
{ @"Peso chilien", @"CLP" },
{ @"Peso colombien", @"COP" },
{ @"Peso cubains convertibles", @"CUC" },
{ @"Peso cubains", @"CUP" },
{ @"Peso dominicain", @"DOP" },
{ @"Swedish krona", @"SEK" },
{ @"West African CFA franc", @"XOF" },
{ @"Central African CFA franc", @"XAF" },
{ @"Comorian franc", @"KMF" },
{ @"Congolese franc", @"CDF" },
{ @"Burundian franc", @"BIF" },
{ @"Djiboutian franc", @"DJF" },
{ @"CFP franc", @"XPF" },
{ @"Guinean franc", @"GNF" },
{ @"Swiss franc", @"CHF" },
{ @"Rwandan franc", @"RWF" },
{ @"Russian ruble", @"RUB" },
{ @"Transnistrian ruble", @"PRB" },
{ @"New Belarusian ruble", @"BYN" },
{ @"Algerian dinar", @"DZD" },
{ @"Bahraini dinar", @"BHD" },
{ @"Iraqi dinar", @"IQD" },
{ @"Jordanian dinar", @"JOD" },
{ @"Kuwaiti dinar", @"KWD" },
{ @"Libyan dinar", @"LYD" },
{ @"Serbian dinar", @"RSD" },
{ @"Tunisian dinar", @"TND" },
{ @"Argentine peso", @"ARS" },
{ @"Chilean peso", @"CLP" },
{ @"Colombian peso", @"COP" },
{ @"Cuban convertible peso", @"CUC" },
{ @"Cuban peso", @"CUP" },
{ @"Dominican peso", @"DOP" },
{ @"Mexican peso", @"MXN" },
{ @"Peso uruguayen", @"UYU" },
{ @"Livre britannique", @"GBP" },
{ @"Livre sainte-hélène", @"SHP" },
{ @"Livre égyptienne", @"EGP" },
{ @"Livre des îles falkland", @"FKP" },
{ @"Livre gibraltar", @"GIP" },
{ @"Livre manx", @"IMP" },
{ @"Livre jersey", @"JEP" },
{ @"Livre libanaise", @"LBP" },
{ @"Uruguayan peso", @"UYU" },
{ @"British pound", @"GBP" },
{ @"Saint Helena pound", @"SHP" },
{ @"Egyptian pound", @"EGP" },
{ @"Falkland Islands pound", @"FKP" },
{ @"Gibraltar pound", @"GIP" },
{ @"Manx pound", @"IMP" },
{ @"Jersey pound", @"JEP" },
{ @"Lebanese pound", @"LBP" },
{ @"South Sudanese pound", @"SSP" },
{ @"Livre soudanaise", @"SDG" },
{ @"Livre syrienne", @"SYP" },
{ @"Dollar États-Unis", @"USD" },
{ @"Dollar Australien", @"AUD" },
{ @"Dollar des bahamas", @"BSD" },
{ @"Sudanese pound", @"SDG" },
{ @"Syrian pound", @"SYP" },
{ @"United States dollar", @"USD" },
{ @"Australian dollar", @"AUD" },
{ @"Bahamian dollar", @"BSD" },
{ @"Barbadian dollar", @"BBD" },
{ @"Dollar de belize", @"BZD" },
{ @"Dollar des bermudes", @"BMD" },
{ @"Dollar de brunei", @"BND" },
{ @"Dollar de Singapour", @"SGD" },
{ @"Dollar Canadien", @"CAD" },
{ @"Dollar des îles Caïmans", @"KYD" },
{ @"Belize dollar", @"BZD" },
{ @"Bermudian dollar", @"BMD" },
{ @"Brunei dollar", @"BND" },
{ @"Singapore dollar", @"SGD" },
{ @"Canadian dollar", @"CAD" },
{ @"Cayman Islands dollar", @"KYD" },
{ @"New Zealand dollar", @"NZD" },
{ @"Fijian dollar", @"FJD" },
{ @"Dollar guyanien", @"GYD" },
{ @"Dollar de Hong Kong", @"HKD" },
{ @"Dollar jamaïcain", @"JMD" },
{ @"Dollar libérien", @"LRD" },
{ @"Dollar namibien", @"NAD" },
{ @"Dollar des îles Salomon", @"SBD" },
{ @"Dollar du suriname", @"SRD" },
{ @"Nouveau dollar de Taïwan", @"TWD" },
{ @"Dollar trinidadien", @"TTD" },
{ @"Guyanese dollar", @"GYD" },
{ @"Hong Kong dollar", @"HKD" },
{ @"Jamaican dollar", @"JMD" },
{ @"Liberian dollar", @"LRD" },
{ @"Namibian dollar", @"NAD" },
{ @"Solomon Islands dollar", @"SBD" },
{ @"Surinamese dollar", @"SRD" },
{ @"New Taiwan dollar", @"TWD" },
{ @"Trinidad and Tobago dollar", @"TTD" },
{ @"Tuvaluan dollar", @"TVD" },
{ @"Yuan Chinois", @"CNY" },
{ @"Chinese yuan", @"CNY" },
{ @"Rial", @"__RI" },
{ @"Shiling", @"__S" },
{ @"Som", @"__SO" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ public static class NumbersWithUnitDefinitions
{ @"Rwandan franc", @"RWF" },
{ @"Russian ruble", @"RUB" },
{ @"Transnistrian ruble", @"PRB" },
{ @"Belarusian ruble", @"BYN" },
{ @"New Belarusian ruble", @"BYN" },
{ @"Algerian dinar", @"DZD" },
{ @"Bahraini dinar", @"BHD" },
{ @"Iraqi dinar", @"IQD" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static class NumbersWithUnitDefinitions
};
public static readonly Dictionary<string, string> CurrencySuffixList = new Dictionary<string, string>
{
{ @"Afganistan afganisi", @"afganistan afganisi|afgani|؋|afn|af|afs|afganistan afganisine|afganistan afganisinde|afganistan afganisinden|afganistan afganisini|afganistan afganisinin|afganistan afganisidir|afganistan afganisiydi|afganistan afganisiymiş" },
{ @"Afghan afghani", @"afganistan afganisi|afgani|؋|afn|af|afs|afganistan afganisine|afganistan afganisinde|afganistan afganisinden|afganistan afganisini|afganistan afganisinin|afganistan afganisidir|afganistan afganisiydi|afganistan afganisiymiş" },
{ @"Pul", @"pul|pula|pulda|puldan|pulu|pulun|puldur|puldu|pulmuş|pulluk" },
{ @"Euro", @"euro|€|eur|avro|avroya|avroda|avrodan|avroyu|avronun|avrodur|avroydu|avroymuş|avroluk" },
{ @"Cent", @"sent|s|sentte|sentten|senti|sentin|senttir|sentti|sentmiş|sentlik" },
Expand Down Expand Up @@ -417,7 +417,7 @@ public static class NumbersWithUnitDefinitions
{ @"Rwandan franc", @"RWF" },
{ @"Russian ruble", @"RUB" },
{ @"Transnistrian ruble", @"PRB" },
{ @"Belarusian ruble", @"BYN" },
{ @"New Belarusian ruble", @"BYN" },
{ @"Algerian dinar", @"DZD" },
{ @"Bahraini dinar", @"BHD" },
{ @"Iraqi dinar", @"IQD" },
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;

using Microsoft.Recognizers.Definitions.Dutch;

Expand All @@ -13,8 +16,25 @@ public class CurrencyExtractorConfiguration : DutchNumberWithUnitExtractorConfig
public static readonly ImmutableDictionary<string, string> CurrencySuffixList =
NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary();

public static readonly ImmutableDictionary<string, string> CurrencyPrefixList =
NumbersWithUnitDefinitions.CurrencyPrefixList.ToImmutableDictionary();
// CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore)
public static readonly Dictionary<string, string> IsoCodeDict =
NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal))
.ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture));

// CurrencyNameToIsoCodeMap followed by '$' symbol (e.g. 'AUD$')
public static readonly Dictionary<string, string> IsoCodeWithSymbolDict =
NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal))
.ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$");

// Merge IsoCodeDict and IsoCodeWithSymbolDict
public static readonly Dictionary<string, string> IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict)
.GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value);

// Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore)
public static readonly Dictionary<string, string> CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict)
.GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value);

public static readonly ImmutableDictionary<string, string> CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary();

public static readonly ImmutableDictionary<string, string> FractionalUnitNameToCodeMap =
NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary();
Expand Down
Loading

0 comments on commit 4db5674

Please sign in to comment.