Skip to content

Commit

Permalink
[* Unit] Micrograms not recognized as dimension type (#2798) (#2811)
Browse files Browse the repository at this point in the history
* Add support for micrograms in most languages (#2798)

* Added support in Chinese

* Fixed typo in Italian

* Added British spelling and medical unit in English and added British spelling in Hindi for code mixing cases

* Added new spec cases in English and Chinese

* Re-gen resources across platforms

* Fixed indentation in French Dimension specs


Co-authored-by: Nayer Wanas <[email protected]>
Co-authored-by: Börje F. Karlsson <[email protected]>
  • Loading branch information
nawanas and tellarin authored Dec 29, 2021
1 parent d32f9e9 commit abdec82
Show file tree
Hide file tree
Showing 51 changed files with 272 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -602,15 +602,16 @@ public static class NumbersWithUnitDefinitions
{ @"Dou", @"市斗|斗" },
{ @"Dan", @"市石|石" },
{ @"Kilogram", @"千克|公斤|kg" },
{ @"Jin", @"市斤|斤" },
{ @"Milligram", @"毫克|mg" },
{ @"Barrel", @"桶" },
{ @"Pot", @"罐" },
{ @"Gram", @"克|g" },
{ @"Milligram", @"毫克|mg" },
{ @"Microgram", @"微克|μg" },
{ @"Ton", @"公吨|吨|t" },
{ @"Pound", @"磅" },
{ @"Ounce", @"盎司" },
{ @"Jin", @"市斤|斤" },
{ @"Liang", @"两" },
{ @"Barrel", @"桶" },
{ @"Pot", @"罐" },
{ @"Bit", @"比特|位|b|bit" },
{ @"Kilobit", @"千比特|千位|kb|Kb" },
{ @"Megabit", @"兆比特|兆位|mb|Mb" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ public static class NumbersWithUnitDefinitions
{ @"Kilogram", @"kg|kilogram|kilo" },
{ @"Gram", @"g|gram" },
{ @"Milligram", @"mg|milligram" },
{ @"Microgram", @"μg|microgram" },
{ @"Barrel", @"vat|vaten" },
{ @"Gallon", @"-gallon|gallon" },
{ @"Metric ton", @"metrische ton" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -850,9 +850,10 @@ public static class NumbersWithUnitDefinitions
};
public static readonly Dictionary<string, string> WeightSuffixList = new Dictionary<string, string>
{
{ @"Kilogram", @"kg|kilogram|kilograms|kilo|kilos" },
{ @"Gram", @"g|gram|grams|gm" },
{ @"Milligram", @"mg|milligram|milligrams" },
{ @"Kilogram", @"kg|kilogram|kilograms|kilo|kilos|kilogramme|kilogrammes" },
{ @"Gram", @"g|gram|grams|gm|gramme|grammes" },
{ @"Milligram", @"mg|milligram|milligrams|milligramme|milligrammes" },
{ @"Microgram", @"μg|microgram|micrograms|micro gram|micro grams|microgramme|microgrammes|mcg" },
{ @"Gallon", @"-gallon|gallons|gallon|gal" },
{ @"Metric ton", @"metric tons|metric ton" },
{ @"Ton", @"-ton|ton|tons|tonne|tonnes" },
Expand All @@ -869,6 +870,7 @@ public static class NumbersWithUnitDefinitions
public static readonly IList<string> AmbiguousWeightUnitList = new List<string>
{
@"g",
@"gr",
@"oz",
@"stone",
@"dram",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,7 @@ public static class NumbersWithUnitDefinitions
{ @"Kilogramme", @"kg|kilogramme|kilogrammes|kilo|kilos" },
{ @"Gram", @"g|gramme|grammes" },
{ @"Milligramme", @"mg|milligramme|milligrammes" },
{ @"Microgramme", @"µg|ug|microgramme|microgrammes" },
{ @"Tonne métrique", @"tonne métrique|tonnes métrique|tonnes métriques|tonne metrique|tonnes metrique" },
{ @"Tonne", @"tonne|tonnes|-tonnes|-tonne" },
{ @"Livre", @"livre|livres" }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ public static class NumbersWithUnitDefinitions
{ @"Kilogram", @"kg|kilogramm|kilo" },
{ @"Gram", @"g|gramm" },
{ @"Milligram", @"mg|milligramm" },
{ @"Microgram", @"μg|mikrogramm" },
{ @"Barrel", @"barrel" },
{ @"Gallon", @"gallone|gallonen" },
{ @"Metric ton", @"metrische tonne|metrische tonnen" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,7 @@ public static class NumbersWithUnitDefinitions
{ @"Kilogram", @"kg|kilogram|kilograms|kilo|kilos" },
{ @"Gram", @"g|gram|grams|ग्रा.|ग्रा|ग्राम" },
{ @"Milligram", @"mg|milligram|milligrams|मिलीग्राम|मिग्रा|मि.ग्रा|मि. ग्रा.|मीलीग्राम|एमजी|एम.जी." },
{ @"Microgram", @"μg|microgram|micrograms|micro gram|micro grams|microgramme|microgrammes|।माइक्रोग्राम|माइक्रो ग्राम" },
{ @"Gallon", @"-gallon|gallons|gallon|गैलन|-गैलन" },
{ @"Metric ton", @"metric tons|metric ton|मीट्रिक टन" },
{ @"Ton", @"-ton|ton|tons|tonne|tonnes|टन" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ public static class NumbersWithUnitDefinitions
{ @"Chilogrammo", @"kg|kilogrammo|chilogrammo|chilogrammi|kilogrammi|kilo|kili|chilo|chili" },
{ @"Grammo", @"g|grammo|grammi|gr" },
{ @"Milligrammo", @"mg|milligrammo|milligrammi" },
{ @"Microgrammo", @"μg|microgrammo|microgrammi" },
{ @"Tonnellata", @"tonnellata|tonnellate" },
{ @"Libbra", @"libbra|libbre" },
{ @"Oncia", @"oncia|once" }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ public static class NumbersWithUnitDefinitions
{ @"Kilogram", @"킬로그램|kg" },
{ @"Jin", @"市斤|斤" },
{ @"Milligram", @"밀리그램|mg" },
{ @"Microgram", @"마이크로그램|μg" },
{ @"Barrel", @"배럴" },
{ @"Pot", @"罐" },
{ @"Gram", @"그램|g" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ public static class NumbersWithUnitDefinitions
{ @"Barrel", @"-fats|fat" },
{ @"Gram", @"g|gram|-grams" },
{ @"Milligram", @"mg|milligram|-milligrams" },
{ @"Microgram", @"μg|mikrogram|-mikrograms" },
{ @"Gallon", @"-gallon|gallons|gallon" },
{ @"Metric ton", @"metric tons|metric ton" },
{ @"Ton", @"-ton|ton|-tons|tons|ton" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,7 @@ public static class NumbersWithUnitDefinitions
{ @"Kilogram", @"kg|kilogram|kilo|kilograma|kilogramda||kilogramdan|kilogramı|kilogramın|kilogramdır|kilogramdı|kilogrammış|kilogramlık|kiloya|kiloda|kilodan|kiloyu|kilonun|kilodur|kiloydu|kiloymuş|kiloluk" },
{ @"Gram", @"g|gram|grama|gramda||gramdan|gramı|gramın|gramdır|gramdı|grammış|gramlık" },
{ @"Milligram", @"mg|miligram|miligrama|miligramda||miligramdan|miligramı|miligramın|miligramdır|miligramdı|miligrammış|miligramlık" },
{ @"Microgram", @"μg|mikrogram|mikrograma|mikrogramda|mikrogramdan|mikrogramı|mikrogramın|mikrogramdır|mikrogramdı|mikrogrammış|mikrogramlık" },
{ @"Barrel", @"varil|varile|varilde|varilden|varili|varilin|varildir|varildi|varilmiş|varillik" },
{ @"Gallon", @"galon|galona|galonda|galondan|galonu|galonun|galondur|galondu|galonmuş|galonluk" },
{ @"Metric ton", @"metrik ton|metrik tona|metrik tonda|metrik tondan|metrik tonu|metrik tonun|metrik tondur|metrik tondu|metrik tonmuş|metrik tonluk" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ public class ChineseDateTime {

public static final String WeekDayRegex = "(?<weekday>周日|周天|周一|周二|周三|周四|周五|周六|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天|禮拜一|禮拜二|禮拜三|禮拜四|禮拜五|禮拜六|禮拜日|禮拜天|週日|週天|週一|週二|週三|週四|週五|週六)";

public static final String WeekDayStartEnd = "^[.]";

public static final String LunarRegex = "(农历|初一|正月|大年(?!龄|纪|级))";

public static final String DateThisRegex = "(这个|这一个|这|这一|本){WeekDayRegex}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,8 @@ public class EnglishDateTime {
public static final String WeekDayEnd = "(this\\s+)?{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";
public static final String WeekDayStart = "^\\s+(on\\s+)?{WeekDayRegex}\\b"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String RangeUnitRegex = "\\b(?<unit>years?|months?|weeks?|fortnights?)\\b";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ public class FrenchDateTime {
public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";
public static final String WeekDayStart = "^\\b$";

public static final String RangeUnitRegex = "\\b(?<unit>(l')?ann[eé]e(s)?|mois|semaines?)\\b";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ public class SpanishDateTime {
public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";
public static final String WeekDayStart = "^\\b$";

public static final String DateYearRegex = "(?<year>{YearRegex}|(?<!,\\s?){TwoDigitYearRegex}|{TwoDigitYearRegex}(?=(\\.(?!\\d)|[?!;]|$)))"
.replace("{YearRegex}", YearRegex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -590,15 +590,16 @@ public class ChineseNumericWithUnit {
.put("Dou", "市斗|斗")
.put("Dan", "市石|石")
.put("Kilogram", "千克|公斤|kg")
.put("Jin", "市斤|斤")
.put("Milligram", "毫克|mg")
.put("Barrel", "桶")
.put("Pot", "罐")
.put("Gram", "克|g")
.put("Milligram", "毫克|mg")
.put("Microgram", "微克|μg")
.put("Ton", "公吨|吨|t")
.put("Pound", "磅")
.put("Ounce", "盎司")
.put("Jin", "市斤|斤")
.put("Liang", "两")
.put("Barrel", "桶")
.put("Pot", "罐")
.put("Bit", "比特|位|b|bit")
.put("Kilobit", "千比特|千位|kb|Kb")
.put("Megabit", "兆比特|兆位|mb|Mb")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -727,9 +727,10 @@ public class EnglishNumericWithUnit {
public static final List<String> AmbiguousVolumeUnitList = Arrays.asList("l", "ounce", "oz", "cup", "cups", "peck", "pecks", "cord", "cords", "gill", "gills", "barrel", "barrels", "tbl", "quart", "quarts", "pinch", "t.", "T.", "Tb.", "ts.");

public static final ImmutableMap<String, String> WeightSuffixList = ImmutableMap.<String, String>builder()
.put("Kilogram", "kg|kilogram|kilograms|kilo|kilos")
.put("Gram", "g|gram|grams|gm")
.put("Milligram", "mg|milligram|milligrams")
.put("Kilogram", "kg|kilogram|kilograms|kilo|kilos|kilogramme|kilogrammes")
.put("Gram", "g|gram|grams|gm|gramme|grammes")
.put("Milligram", "mg|milligram|milligrams|milligramme|milligrammes")
.put("Microgram", "μg|microgram|micrograms|micro gram|micro grams|microgramme|microgrammes|mcg")
.put("Gallon", "-gallon|gallons|gallon|gal")
.put("Metric ton", "metric tons|metric ton")
.put("Ton", "-ton|ton|tons|tonne|tonnes")
Expand All @@ -744,7 +745,7 @@ public class EnglishNumericWithUnit {
.put("Dram", "dram|drachm|drachma|roman drachma|greek drachma")
.build();

public static final List<String> AmbiguousWeightUnitList = Arrays.asList("g", "oz", "stone", "dram", "lbs", "gal", "grain", "grains");
public static final List<String> AmbiguousWeightUnitList = Arrays.asList("g", "gr", "oz", "stone", "dram", "lbs", "gal", "grain", "grains");

public static final ImmutableMap<String, String> AmbiguityFiltersDict = ImmutableMap.<String, String>builder()
.put("\\bm\\b", "((('|’)\\s*m)|(m\\s*('|’)))")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ public class FrenchNumericWithUnit {
.put("Kilogramme", "kg|kilogramme|kilogrammes|kilo|kilos")
.put("Gram", "g|gramme|grammes")
.put("Milligramme", "mg|milligramme|milligrammes")
.put("Microgramme", "µg|ug|microgramme|microgrammes")
.put("Tonne métrique", "tonne métrique|tonnes métrique|tonnes métriques|tonne metrique|tonnes metrique")
.put("Tonne", "tonne|tonnes|-tonnes|-tonne")
.put("Livre", "livre|livres")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,7 @@ public class GermanNumericWithUnit {
.put("Kilogram", "kg|kilogramm|kilo")
.put("Gram", "g|gramm")
.put("Milligram", "mg|milligramm")
.put("Microgram", "μg|mikrogramm")
.put("Barrel", "barrel")
.put("Gallon", "gallone|gallonen")
.put("Metric ton", "metrische tonne|metrische tonnen")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ public class JapaneseNumericWithUnit {

public static final String ConnectorToken = "";

public static final Boolean CheckFirstSuffix = true;

public static final ImmutableMap<String, String> CurrencySuffixList = ImmutableMap.<String, String>builder()
.put("Afghan afghani", "アフガニ")
.put("Pul", "プル")
Expand Down Expand Up @@ -541,6 +543,26 @@ public class JapaneseNumericWithUnit {
public static final List<String> CurrencyAmbiguousValues = Arrays.asList("円", "銭", "分", "レク", "プル", "ブル", "\\");

public static final ImmutableMap<String, String> AmbiguityFiltersDict = ImmutableMap.<String, String>builder()
.put("null", "null")
.put("五角", "五角大楼")
.put("普尔", "标准普尔")
.build();

public static final ImmutableMap<String, String> TemperatureSuffixList = ImmutableMap.<String, String>builder()
.put("F", "華氏|華氏温度|華氏温度の|°f")
.put("K", "开尔文温度|开氏度|凯氏度|K|k")
.put("R", "兰氏温度|°r")
.put("C", "摂氏|摂氏温度|°c|℃")
.put("Degree", "度")
.build();

public static final ImmutableMap<String, String> TemperaturePrefixList = ImmutableMap.<String, String>builder()
.put("F", "華氏|華氏温度|華氏温度の|华氏")
.put("K", "开氏温度|开氏")
.put("R", "兰氏温度|兰氏")
.put("C", "摂氏温度|摂氏")
.build();

public static final List<String> TemperatureAmbiguousValues = Arrays.asList("度", "k");

public static final String HalfUnitRegex = "半";
}
Original file line number Diff line number Diff line change
Expand Up @@ -272,19 +272,17 @@ public class ChineseNumeric {
.replace("{PointRegexStr}", PointRegexStr)
.replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex);

public static final String NumbersWithAllowListRegex = "(?<![百佰]\\s*分\\s*之\\s*({AllIntRegex}[点點]*|{AllFloatRegex})*){NegativeNumberTermsRegex}?({NotSingleRegex}|{SingleRegex})(?!({AllIntRegex}*([点點]{ZeroToNineIntegerRegex}+)*|{AllFloatRegex})*\\s*[个個]\\s*[百佰]\\s*分\\s*[点點])"
public static final String NumbersWithAllowListRegex = "{NegativeNumberTermsRegex}?({NotSingleRegex}|{SingleRegex})"
.replace("{AllIntRegex}", AllIntRegex)
.replace("{AllFloatRegex}", AllFloatRegex)
.replace("{NegativeNumberTermsRegex}", NegativeNumberTermsRegex)
.replace("{NotSingleRegex}", NotSingleRegex)
.replace("{SingleRegex}", SingleRegex)
.replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex);

public static final String NumbersAggressiveRegex = "(?<![百佰]\\s*分\\s*之\\s*({AllIntRegex}[点點]*|{AllFloatRegex})*){NegativeNumberTermsRegex}?{AllIntRegex}(?!({AllIntRegex}*([点點]{ZeroToNineIntegerRegex}+)*|{AllFloatRegex})*\\s*[个個]\\s*[百佰]\\s*分\\s*[点點])"
public static final String NumbersAggressiveRegex = "{NegativeNumberTermsRegex}?{AllIntRegex}"
.replace("{AllIntRegex}", AllIntRegex)
.replace("{AllFloatRegex}", AllFloatRegex)
.replace("{NegativeNumberTermsRegex}", NegativeNumberTermsRegex)
.replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex);
.replace("{NegativeNumberTermsRegex}", NegativeNumberTermsRegex);

public static final String PointRegex = "{PointRegexStr}"
.replace("{PointRegexStr}", PointRegexStr);
Expand All @@ -310,7 +308,7 @@ public class ChineseNumeric {
.replace("{NegativeNumberTermsRegex}", NegativeNumberTermsRegex)
.replace("{ZeroToNineFullHalfRegex}", ZeroToNineFullHalfRegex);

public static final String DoubleAllFloatRegex = "(?<![百佰]\\s*分\\s*之\\s*(({AllIntRegex}[点點]*)|{AllFloatRegex})*){AllFloatRegex}(?!{ZeroToNineIntegerRegex}*\\s*[个個]\\s*[百佰]\\s*分\\s*[点點])"
public static final String DoubleAllFloatRegex = "{AllFloatRegex}"
.replace("{AllIntRegex}", AllIntRegex)
.replace("{AllFloatRegex}", AllFloatRegex)
.replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export namespace ChineseDateTime {
export const DynastyYearRegex = `(?<dynasty>${RegionTitleRegex})(?<biasYear>(${DynastyStartYear}|\\d{1,3}|[十拾]?(${ZeroToNineIntegerRegexCJK}[十百拾佰]?){0,3}))`;
export const DateYearInCJKRegex = `(?<yearCJK>(${ZeroToNineIntegerRegexCJK}${ZeroToNineIntegerRegexCJK}${ZeroToNineIntegerRegexCJK}${ZeroToNineIntegerRegexCJK}|${ZeroToNineIntegerRegexCJK}${ZeroToNineIntegerRegexCJK}|${ZeroToNineIntegerRegexCJK}${ZeroToNineIntegerRegexCJK}${ZeroToNineIntegerRegexCJK}|${DynastyYearRegex}))`;
export const WeekDayRegex = `(?<weekday>周日|周天|周一|周二|周三|周四|周五|周六|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天|禮拜一|禮拜二|禮拜三|禮拜四|禮拜五|禮拜六|禮拜日|禮拜天|週日|週天|週一|週二|週三|週四|週五|週六)`;
export const WeekDayStartEnd = `^[.]`;
export const LunarRegex = `(农历|初一|正月|大年(?!龄|纪|级))`;
export const DateThisRegex = `(这个|这一个|这|这一|本)${WeekDayRegex}`;
export const DateLastRegex = `(上一个|上个|上一|上|最后一个|最后)(的)?${WeekDayRegex}`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ export namespace EnglishDateTime {
export const OfMonth = `^(\\s*(day\\s+)?of)?\\s*${MonthRegex}`;
export const MonthEnd = `${MonthRegex}\\s*(the)?\\s*$`;
export const WeekDayEnd = `(this\\s+)?${WeekDayRegex}\\s*,?\\s*$`;
export const WeekDayStart = `^[\\.]`;
export const WeekDayStart = `^\\s+(on\\s+)?${WeekDayRegex}\\b`;
export const RangeUnitRegex = `\\b(?<unit>years?|months?|weeks?|fortnights?)\\b`;
export const HourNumRegex = `\\b(?<hournum>zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b`;
export const MinuteNumRegex = `(((?<tens>twenty|thirty|fou?rty|fifty)(\\s*-?\\s*))?(?<minnum>one|two|three|four|five|six|seven|eight|nine)|(?<minnum>ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)|twenty|thirty|forty|fifty))`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ export namespace FrenchDateTime {
export const OfMonth = `^(\\s*de)?\\s*${MonthRegex}\\b`;
export const MonthEnd = `${MonthRegex}\\s*(le)?\\s*$`;
export const WeekDayEnd = `${WeekDayRegex}\\s*,?\\s*$`;
export const WeekDayStart = `^[\\.]`;
export const WeekDayStart = `^\\b$`;
export const RangeUnitRegex = `\\b(?<unit>(l')?ann[eé]e(s)?|mois|semaines?)\\b`;
export const HourNumRegex = `\\b(?<hournum>zero|une?(?=\\s+heure)|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|seize|dix(-|\\s+)sept|dix(-|\\s+)huit|dix(-|\\s+)neuf|vingt|vingt(-|\\s+)et(-|\\s+)un|vingt(-|\\s+)deux|vingt(-|\\s+)trois|dix)\\b`;
export const MinuteNumRegex = `(?<minnum>((vingt|trente|quarante|cinquante)(\\s*(et|-)?\\s*))?(un|deux|trois|quatre|cinq|six|sept|huit|neuf)|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)`;
Expand Down
Loading

0 comments on commit abdec82

Please sign in to comment.