Skip to content

Commit

Permalink
[NL Number|Unit] Number and number with unit support in Python (#3047)
Browse files Browse the repository at this point in the history
  • Loading branch information
Conor-Keaney authored Dec 16, 2022
1 parent f26b3d2 commit e379469
Show file tree
Hide file tree
Showing 30 changed files with 2,116 additions and 341 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions Patterns/Dutch/Dutch-Numbers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ NumbersWithDozenSuffix: !simpleRegex
AllIntRegexWithLocks: !nestedRegex
def: ((?<=\b){AllIntRegex}(?=\b))
references: [ AllIntRegex ]
GrossRegex: !simpleRegex
def: (een\s+)?gros
AllIntRegexWithDozenSuffixLocks: !nestedRegex
def: (?<=\b)(((een\s+)?half\s+dozijn)|({AllIntRegex}\s+dozijn(en)?)|{GrossRegex})(?=\b)
references: [ AllIntRegex, GrossRegex ]
Expand Down Expand Up @@ -128,7 +130,7 @@ DoubleExponentialNotationRegex: !simpleRegex
DoubleCaretExponentialNotationRegex: !simpleRegex
def: (((?<!\d+\s*)-\s*)|((?<=\b)(?<!\d+,)))(\d+(,\d+)?)\^([+-]*[1-9]\d*)(?=\b)
DoubleDecimalPointRegex: !paramsRegex
def: (((?<!\d+\s*)-\s*)|((?<=\b)(?<!\d+,)))\d+,\d+(?!(,\d+))(?={placeholder})
def: (?<=\b)((\d{1,3})(\.\d{3})*(\,\d+)?)(?={placeholder})
params: [ placeholder ]
DoubleWithoutIntegralRegex: !paramsRegex
def: (?<=\s|^)(?<!(\d+)),\d+(?!(,\d+))(?={placeholder})
Expand All @@ -139,6 +141,8 @@ DoubleWithRoundNumber: !nestedRegex
DoubleAllFloatRegex: !nestedRegex
def: ((?<=\b){AllFloatRegex}(?=\b))
references: [ AllFloatRegex ]
ConnectorRegex: !simpleRegex
def: (?<spacer>en)
#Percentage Regex
NumberWithSuffixPercentage: !nestedRegex
def: (?<!%)({BaseNumbers.NumberReplaceToken})(\s*)(%(?!{BaseNumbers.NumberReplaceToken})|(procent|percentage|percent)\b)
Expand Down Expand Up @@ -238,8 +242,6 @@ WrittenIntegerSeparatorTexts: [en, ën]
WrittenFractionSeparatorTexts: [uit, van de, op de, en]
HalfADozenRegex: !simpleRegex
def: (een\s+)?half\s+dozijn
GrossRegex: !simpleRegex
def: (een\s+)?gros
DigitalNumberRegex: !nestedRegex
def: ((?<=\b)(honderd|duizend|miljoen|miljard|biljoen|dozijn?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))
references: [ BaseNumbers.MultiplierLookupRegex ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .extractors import *
from .parsers import *
from .chinese import *
from .dutch import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from .extractors import *
from .parsers import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from typing import Dict, List, Pattern

from recognizers_text.culture import Culture
from recognizers_text.extractor import Extractor
from recognizers_text.utilities import RegExpUtility, DefinitionLoader
from recognizers_number.culture import CultureInfo
from recognizers_number.number.models import NumberMode
from recognizers_number.number.dutch.extractors import DutchNumberExtractor
from recognizers_number_with_unit.number_with_unit.constants import Constants
from recognizers_number_with_unit.number_with_unit.extractors import NumberWithUnitExtractorConfiguration
from recognizers_number_with_unit.resources.dutch_numeric_with_unit import DutchNumericWithUnit
from recognizers_number_with_unit.resources.base_units import BaseUnits


# pylint: disable=abstract-method
class DutchNumberWithUnitExtractorConfiguration(NumberWithUnitExtractorConfiguration):
@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DefinitionLoader.load_ambiguity_filters(DutchNumericWithUnit.AmbiguityFiltersDict)

@property
def dimension_ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DefinitionLoader.load_ambiguity_filters(DutchNumericWithUnit.DimensionAmbiguityFiltersDict)

@property
def unit_num_extractor(self) -> Extractor:
return self._unit_num_extractor

@property
def build_prefix(self) -> str:
return self._build_prefix

@property
def build_suffix(self) -> str:
return self._build_suffix

@property
def connector_token(self) -> str:
return ''

@property
def compound_unit_connector_regex(self) -> Pattern:
return self._compound_unit_connector_regex

@property
def non_unit_regex(self) -> Pattern:
return self._pm_non_unit_regex

@property
def ambiguous_unit_number_multiplier_regex(self) -> Pattern:
return None

def expand_half_suffix(self, source, result, numbers):
pass

def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Dutch)
super().__init__(culture_info)
self._unit_num_extractor = DutchNumberExtractor(NumberMode.Unit)
self._build_prefix = DutchNumericWithUnit.BuildPrefix
self._build_suffix = DutchNumericWithUnit.BuildSuffix
self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp(
DutchNumericWithUnit.CompoundUnitConnectorRegex)
self._pm_non_unit_regex = RegExpUtility.get_safe_reg_exp(
BaseUnits.PmNonUnitRegex)


# pylint: enable=abstract-method

class DutchAgeExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):
@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_AGE

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = DutchNumericWithUnit.AgeSuffixList
self._prefix_list = dict()
self._ambiguous_unit_list = list()


class DutchCurrencyExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):

@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_CURRENCY

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = DutchNumericWithUnit.CurrencySuffixList
self._prefix_list = DutchNumericWithUnit.CurrencyPrefixList
self._ambiguous_unit_list = DutchNumericWithUnit.AmbiguousCurrencyUnitList


class DutchDimensionExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):

@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DutchNumericWithUnit.AmbiguityFiltersDict

@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_DIMENSION

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = {
**DutchNumericWithUnit.InformationSuffixList,
**DutchNumericWithUnit.AreaSuffixList,
**DutchNumericWithUnit.LengthSuffixList,
**DutchNumericWithUnit.AngleSuffixList,
**DutchNumericWithUnit.SpeedSuffixList,
**DutchNumericWithUnit.VolumeSuffixList,
**DutchNumericWithUnit.WeightSuffixList
}
self._prefix_list = dict()
self._ambiguous_unit_list = DutchNumericWithUnit.AmbiguousDimensionUnitList +\
DutchNumericWithUnit.AmbiguousAngleUnitList +\
DutchNumericWithUnit.AmbiguousLengthUnitList +\
DutchNumericWithUnit.AmbiguousVolumeUnitList +\
DutchNumericWithUnit.AmbiguousWeightUnitList


class DutchTemperatureExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):

@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DutchNumericWithUnit.AmbiguityFiltersDict

@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_TEMPERATURE

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

@property
def ambiguous_unit_number_multiplier_regex(self) -> Pattern:
return self._ambiguous_unit_number_multiplier_regex

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = DutchNumericWithUnit.TemperatureSuffixList
self._prefix_list = dict()
self._ambiguous_unit_list = DutchNumericWithUnit.AmbiguousTemperatureUnitList
self._ambiguous_unit_number_multiplier_regex = RegExpUtility.get_safe_reg_exp(
BaseUnits.AmbiguousUnitNumberMultiplierRegex)
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from recognizers_text import Culture
from recognizers_text.extractor import Extractor
from recognizers_text.parser import Parser
from recognizers_number.culture import CultureInfo
from recognizers_number.number.dutch.extractors import DutchNumberExtractor, NumberMode
from recognizers_number.number.parser_factory import AgnosticNumberParserFactory, ParserType
from recognizers_number.number.dutch.parsers import DutchNumberParserConfiguration
from recognizers_number_with_unit.number_with_unit.parsers import NumberWithUnitParserConfiguration
from recognizers_number_with_unit.resources.dutch_numeric_with_unit import DutchNumericWithUnit


class DutchNumberWithUnitParserConfiguration(NumberWithUnitParserConfiguration):
@property
def internal_number_parser(self) -> Parser:
return self._internal_number_parser

@property
def internal_number_extractor(self) -> Extractor:
return self._internal_number_extractor

@property
def connector_token(self) -> str:
return ''

def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Dutch)
super().__init__(culture_info)
self._internal_number_extractor = DutchNumberExtractor(
NumberMode.DEFAULT)
self._internal_number_parser = AgnosticNumberParserFactory.get_parser(
ParserType.NUMBER, DutchNumberParserConfiguration(culture_info))


class DutchAgeParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.AgeSuffixList)


class DutchCurrencyParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.CurrencySuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.CurrencyPrefixList)
self.currency_name_to_iso_code_map = DutchNumericWithUnit.CurrencyNameToIsoCodeMap
self.currency_fraction_code_list = DutchNumericWithUnit.FractionalUnitNameToCodeMap


class DutchDimensionParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.InformationSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.AreaSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.LengthSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.SpeedSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.AngleSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.VolumeSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.WeightSuffixList)


class DutchTemperatureParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.TemperatureSuffixList)
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
ChineseTemperatureParserConfiguration,
ChineseDimensionParserConfiguration,
ChineseAgeParserConfiguration)
from .dutch.extractors import (DutchCurrencyExtractorConfiguration,
DutchTemperatureExtractorConfiguration,
DutchDimensionExtractorConfiguration,
DutchAgeExtractorConfiguration)
from .dutch.parsers import (DutchCurrencyParserConfiguration,
DutchTemperatureParserConfiguration,
DutchDimensionParserConfiguration,
DutchAgeParserConfiguration)
from .spanish.extractors import (SpanishCurrencyExtractorConfiguration,
SpanishTemperatureExtractorConfiguration,
SpanishDimensionExtractorConfiguration,
Expand Down Expand Up @@ -128,6 +136,30 @@ def initialize_configuration(self):
]))
# endregion

# region Dutch
self.register_model('CurrencyModel', Culture.Dutch, lambda options: CurrencyModel(
[ExtractorParserModel(BaseMergedUnitExtractor(DutchCurrencyExtractorConfiguration(
)), BaseMergedUnitParser(DutchCurrencyParserConfiguration()))]
))
self.register_model('TemperatureModel', Culture.Dutch, lambda options: TemperatureModel([
ExtractorParserModel(
NumberWithUnitExtractor(
DutchTemperatureExtractorConfiguration()),
NumberWithUnitParser(DutchTemperatureParserConfiguration()))
]))
self.register_model('DimensionModel', Culture.Dutch, lambda options: DimensionModel([
ExtractorParserModel(
NumberWithUnitExtractor(
DutchDimensionExtractorConfiguration()),
NumberWithUnitParser(DutchDimensionParserConfiguration()))
]))
self.register_model('AgeModel', Culture.Dutch, lambda options: AgeModel([
ExtractorParserModel(
NumberWithUnitExtractor(DutchAgeExtractorConfiguration()),
NumberWithUnitParser(DutchAgeParserConfiguration()))
]))
# endregion

# region French
self.register_model('CurrencyModel', Culture.French, lambda options: CurrencyModel(
[ExtractorParserModel(BaseMergedUnitExtractor(FrenchCurrencyExtractorConfiguration(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
from .french_numeric_with_unit import FrenchNumericWithUnit
from .italian_numeric_with_unit import ItalianNumericWithUnit
from .german_numeric_with_unit import GermanNumericWithUnit
from .dutch_numeric_with_unit import DutchNumericWithUnit
from .portuguese_numeric_with_unit import PortugueseNumericWithUnit
from .spanish_numeric_with_unit import SpanishNumericWithUnit
Loading

0 comments on commit e379469

Please sign in to comment.