From 81479b203e735a50b4ae1bfb1ef6dd8e284b29f6 Mon Sep 17 00:00:00 2001 From: John Meade Date: Mon, 9 Sep 2024 15:22:14 +0100 Subject: [PATCH] Ensure strings with only underscores are not processed as Integer A string similar to "0x____" should be treated as a string. Currently it is processed as an Integer. This alters the regex specified by http://yaml.org/type/int.html to ensure at least one numerical symbol is present in the string before converting to Integer. --- lib/psych/scalar_scanner.rb | 16 ++++++++-------- test/psych/test_scalar_scanner.rb | 13 +++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lib/psych/scalar_scanner.rb b/lib/psych/scalar_scanner.rb index f1245694..de214423 100644 --- a/lib/psych/scalar_scanner.rb +++ b/lib/psych/scalar_scanner.rb @@ -13,18 +13,18 @@ class ScalarScanner # Base 60, [-+]inf and NaN are handled separately FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10))$/x - # Taken from http://yaml.org/type/int.html - INTEGER_STRICT = /^(?:[-+]?0b[0-1_]+ (?# base 2) - |[-+]?0[0-7_]+ (?# base 8) - |[-+]?(0|[1-9][0-9_]*) (?# base 10) - |[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x + # Taken from http://yaml.org/type/int.html and modified to ensure at least one numerical symbol exists + INTEGER_STRICT = /^(?:[-+]?0b[_]*[0-1][0-1_]* (?# base 2) + |[-+]?0[_]*[0-7][0-7_]* (?# base 8) + |[-+]?(0|[1-9][0-9_]*) (?# base 10) + |[-+]?0x[_]*[0-9a-fA-F][0-9a-fA-F_]* (?# base 16))$/x # Same as above, but allows commas. # Not to YML spec, but kept for backwards compatibility - INTEGER_LEGACY = /^(?:[-+]?0b[0-1_,]+ (?# base 2) - |[-+]?0[0-7_,]+ (?# base 8) + INTEGER_LEGACY = /^(?:[-+]?0b[_,]*[0-1][0-1_,]* (?# base 2) + |[-+]?0[_,]*[0-7][0-7_,]* (?# base 8) |[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10) - |[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x + |[-+]?0x[_,]*[0-9a-fA-F][0-9a-fA-F_,]* (?# base 16))$/x attr_reader :class_loader diff --git a/test/psych/test_scalar_scanner.rb b/test/psych/test_scalar_scanner.rb index 02b923af..8907d125 100644 --- a/test/psych/test_scalar_scanner.rb +++ b/test/psych/test_scalar_scanner.rb @@ -126,6 +126,19 @@ def test_scan_strings_ending_with_underscores assert_equal '100_', ss.tokenize('100_') end + def test_scan_strings_with_legacy_int_delimiters + assert_equal '0x_,_', ss.tokenize('0x_,_') + assert_equal '+0__,,', ss.tokenize('+0__,,') + assert_equal '-0b,_,', ss.tokenize('-0b,_,') + end + + def test_scan_strings_with_strict_int_delimiters + scanner = Psych::ScalarScanner.new ClassLoader.new, strict_integer: true + assert_equal '0x___', scanner.tokenize('0x___') + assert_equal '+0____', scanner.tokenize('+0____') + assert_equal '-0b___', scanner.tokenize('-0b___') + end + def test_scan_int_commas_and_underscores # NB: This test is to ensure backward compatibility with prior Psych versions, # not to test against any actual YAML specification.