Skip to content

Commit

Permalink
Improve parsing of malformed decimals (#1042)
Browse files Browse the repository at this point in the history
Signed-off-by: Olunusi Best <[email protected]>
Co-authored-by: Aarni Koskela <[email protected]>
  • Loading branch information
olunusib and akx authored Nov 28, 2023
1 parent aca7663 commit 946efcd
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 2 deletions.
37 changes: 36 additions & 1 deletion babel/numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ def parse_decimal(
raise NumberFormatError(f"{string!r} is not a valid decimal number") from exc
if strict and group_symbol in string:
proper = format_decimal(parsed, locale=locale, decimal_quantization=False, numbering_system=numbering_system)
if string != proper and string.rstrip('0') != (proper + decimal_symbol):
if string != proper and proper != _remove_trailing_zeros_after_decimal(string, decimal_symbol):
try:
parsed_alt = decimal.Decimal(string.replace(decimal_symbol, '')
.replace(group_symbol, '.'))
Expand Down Expand Up @@ -1131,6 +1131,41 @@ def parse_decimal(
return parsed


def _remove_trailing_zeros_after_decimal(string: str, decimal_symbol: str) -> str:
"""
Remove trailing zeros from the decimal part of a numeric string.
This function takes a string representing a numeric value and a decimal symbol.
It removes any trailing zeros that appear after the decimal symbol in the number.
If the decimal part becomes empty after removing trailing zeros, the decimal symbol
is also removed. If the string does not contain the decimal symbol, it is returned unchanged.
:param string: The numeric string from which to remove trailing zeros.
:type string: str
:param decimal_symbol: The symbol used to denote the decimal point.
:type decimal_symbol: str
:return: The numeric string with trailing zeros removed from its decimal part.
:rtype: str
Example:
>>> _remove_trailing_zeros_after_decimal("123.4500", ".")
'123.45'
>>> _remove_trailing_zeros_after_decimal("100.000", ".")
'100'
>>> _remove_trailing_zeros_after_decimal("100", ".")
'100'
"""
integer_part, _, decimal_part = string.partition(decimal_symbol)

if decimal_part:
decimal_part = decimal_part.rstrip("0")
if decimal_part:
return integer_part + decimal_symbol + decimal_part
return integer_part

return string


PREFIX_END = r'[^0-9@#.,]'
NUMBER_TOKEN = r'[0-9@#.,E+]'

Expand Down
12 changes: 11 additions & 1 deletion tests/test_numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def test_can_parse_decimals(self):
with pytest.raises(numbers.UnsupportedNumberingSystemError):
numbers.parse_decimal('2,109,998', locale='de', numbering_system="unknown")


def test_parse_decimal_strict_mode(self):
# Numbers with a misplaced grouping symbol should be rejected
with pytest.raises(numbers.NumberFormatError) as info:
Expand All @@ -221,8 +220,19 @@ def test_parse_decimal_strict_mode(self):
assert str(numbers.parse_decimal('1.001', locale='de', strict=True)) == '1001'
# Trailing zeroes should be accepted
assert str(numbers.parse_decimal('3.00', locale='en_US', strict=True)) == '3.00'
# Numbers with a grouping symbol and no trailing zeroes should be accepted
assert str(numbers.parse_decimal('3,400.6', locale='en_US', strict=True)) == '3400.6'
# Numbers with a grouping symbol and trailing zeroes (not all zeroes after decimal) should be accepted
assert str(numbers.parse_decimal('3,400.60', locale='en_US', strict=True)) == '3400.60'
# Numbers with a grouping symbol and trailing zeroes (all zeroes after decimal) should be accepted
assert str(numbers.parse_decimal('3,400.00', locale='en_US', strict=True)) == '3400.00'
assert str(numbers.parse_decimal('3,400.0000', locale='en_US', strict=True)) == '3400.0000'
# Numbers with a grouping symbol and no decimal part should be accepted
assert str(numbers.parse_decimal('3,800', locale='en_US', strict=True)) == '3800'
# Numbers without any grouping symbol should be accepted
assert str(numbers.parse_decimal('2000.1', locale='en_US', strict=True)) == '2000.1'
# Numbers without any grouping symbol and no decimal should be accepted
assert str(numbers.parse_decimal('2580', locale='en_US', strict=True)) == '2580'
# High precision numbers should be accepted
assert str(numbers.parse_decimal('5,000001', locale='fr', strict=True)) == '5.000001'

Expand Down

0 comments on commit 946efcd

Please sign in to comment.