Address issues 81+83, avoid too-verbose num reprs

Decimal numbers have often been too verbose since v8.1. Unfortunately I couldn't find an algorithm that simultaneously * avoids loss of precision on numbers where the most precise 64-bit floating point representation has at least 16 digits of precision (e.g., -1.79769313486232E+308) * while also not being too verbose with many other numbers (e.g., writing 11.11 as 11.099999999999999) * and does not have noticeably worse performance. In fact, my new algorithm is about 100% slower than the algorithm used since this commit (7522a20) and will, in the limiting case where all numbers require at least 16 digits of precision, be closer to 200% slower. Obviously most real JSON documents are not just long arrays of decimal numbers, so the actual performance loss will generally be less than that.
molsonkiko · Nov 3, 2024 · f259bf6 · f259bf6
1 parent 9f96580
commit f259bf6
Show file tree

Hide file tree

Showing 9 changed files with 94 additions and 81 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -65,6 +65,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Fixed
 
+1. Fix issue (introduced in v8.1, see issues [83](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/83) and [81](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/81)) where decimal numbers were given unnecessarily precise string representations. __For example, in JsonTools v8.1, `11.11` would be represented as `11.109999999999999` even though the original `11.11` was an equally valid representation.__ Now all decimal numbers will be given the more compact representation used in v8.0 and earlier *unless the more verbose representation introduced in v8.1 would be necessary to avoid loss of precision* (so there will be no regression on [issue 78](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/78)). Note that __this new algorithm for formatting decimal numbers is about twice as slow as the algorithm used in v8.1__, although the impact on performance will be much less dramatic unless you are compressing an array where almost every element is a non-integer decimal number.
 1. Fix the following issues with [random string from regex](/docs/README.md#random-strings-from-regex-added-in-v81):
     - It previously incorrectly flagged some valid regular expressions (e.g. `(?-i)(?:xy{1,2}){,2}`) as having two consecutive quantifiers.
 	- It previously did not correctly handle some character sets where the final character was `-` (for example, `[+-]` previously would only generate `+`, and now it correctly has a 50% chance of generating `-` or `+`)

diff --git a/JsonToolsNppPlugin/JSONTools/JNode.cs b/JsonToolsNppPlugin/JSONTools/JNode.cs
@@ -403,14 +403,36 @@ public static void StrToSb(StringBuilder sb, string s)
         }
 
         /// <summary>
-        /// returns d formatted with up to 17 digits of precision, using '.' as the decimal separator.<br></br>
-        /// For whatever reason, d.ToString(<see cref="DOT_DECIMAL_SEP"/>) only gives 15 digits of precision by default.
+        /// Let <c>d17</c> = d.ToString("G17", <see cref="JNode.DOT_DECIMAL_SEP"/>) (which can always be parsed to regenerate a double equal to <c>d</c>)<br></br>
+        /// and let <c>d15</c> = d.ToString(<see cref="JNode.DOT_DECIMAL_SEP"/>) (which only keeps 15 digits of precision)<br></br>
+        /// Returns <c>d</c> formatted with up to 17 digits of precision, using '.' as the decimal separator.<br></br>
+        /// If <c>d17</c> includes 17 digits of precision, we will generate <c>d15</c>.<br></br>
+        /// If <c>d15</c> is shorter than <c>d17</c>, and if (<c>double.Parse(d15) == d</c>, we will prefer <c>d15</c> because <c>d17</c> was an unncessarily verbose representation of <c>d</c>.
         /// </summary>
         /// <param name="d"></param>
         /// <returns></returns>
         public static string DoubleToString(double d)
         {
-            return d.ToString("G17", DOT_DECIMAL_SEP);
+            string dubstring = d.ToString(DOT_DECIMAL_SEP);
+            int indexOfE = dubstring.IndexOf('E');
+            if (d == Math.Round(d) && !(d > long.MaxValue || d < long.MinValue) && indexOfE < 0)
+            {
+                // add ending ".0" to distinguish doubles equal to integers from actual integers
+                // unless they use exponential notation, in which case you mess things up
+                // by turning something like 3.123E+15 into 3.123E+15.0 (a non-JSON number representation)
+                return dubstring + ".0";
+            }
+            // the default d.ToString(DOT_DECIMAL_SEP) might lose precision in some cases.
+            // We will nonetheless prefer this representation because the G17 representation
+            //     has stupid unnecessarily verbose representations like representing 2317.24 as 2317.2399999999998
+            // We need to parse dubstring to make sure no precision has been lost.
+            try
+            {
+                if (double.Parse(dubstring) == d)
+                    return dubstring; // default string representation has all necessary precision
+            }
+            catch { }
+            return d.ToString("G17", DOT_DECIMAL_SEP); // we need to use a string representation that retains as much precision as possible
         }
 
         /// <summary>
@@ -436,15 +458,7 @@ public virtual string ToString(bool sortKeys = true, string keyValueSep = ": ",
                         return (v < 0) ? "-Infinity" : "Infinity";
                     }
                     if (double.IsNaN(v)) { return "NaN"; }
-                    string dubstring = DoubleToString(v);
-                    if (v == Math.Round(v) && !(v > long.MaxValue || v < long.MinValue) && dubstring.IndexOf('E') < 0)
-                    {
-                        // add ending ".0" to distinguish doubles equal to integers from actual integers
-                        // unless they use exponential notation, in which case you mess things up
-                        // by turning something like 3.123E+15 into 3.123E+15.0 (a non-JSON number representation)
-                        return dubstring + ".0";
-                    }
-                    return dubstring;
+                    return DoubleToString(v);
                 }
                 case Dtype.INT: return Convert.ToInt64(value).ToString();
                 case Dtype.NULL: return "null";

diff --git a/JsonToolsNppPlugin/JSONTools/YamlDumper.cs b/JsonToolsNppPlugin/JSONTools/YamlDumper.cs
@@ -157,8 +157,6 @@ private string YamlValRepr(JNode v)
                 {
                     return ".nan";
                 }
-                if (d % 1 == 0)
-                    return $"{d}.0"; // add .0 at end of floats that are equal to ints
                 return JNode.DoubleToString(d);
             }
             return strv;

diff --git a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs
@@ -28,5 +28,5 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("8.1.0.13")]
-[assembly: AssemblyFileVersion("8.1.0.13")]
+[assembly: AssemblyVersion("8.1.0.14")]
+[assembly: AssemblyFileVersion("8.1.0.14")]
diff --git a/JsonToolsNppPlugin/Tests/JsonParserTests.cs b/JsonToolsNppPlugin/Tests/JsonParserTests.cs
@@ -163,8 +163,8 @@ public static bool Test()
                 ("[[]]", "[[]]", "[" + NL + "    [" + NL + "    ]" + NL + "]", "empty lists" ),
                 ("\"abc\"", "\"abc\"", "\"abc\"", "scalar string" ),
                 ("1", "1", "1", "scalar int" ),
-                ("-1.0", "-1.0", "-1.0", "negative scalar float" ),
-                ("3.5", "3.5", "3.5", "scalar float" ),
+                ("-2317.24", "-2317.24", "-2317.24", "negative scalar float" ),
+                ("11.11", "11.11", "11.11", "scalar float" ),
                 ("-4", "-4", "-4", "negative scalar int" ),
                 ("[{\"FullName\":\"C:\\\\123467756\\\\Z\",\"LastWriteTimeUtc\":\"\\/Date(1600790697130)\\/\"}," +
                             "{\"FullName\":\"C:\\\\123467756\\\\Z\\\\B\",\"LastWriteTimeUtc\":\"\\/Date(1618852147285)\\/\"}]",
@@ -203,8 +203,8 @@ public static bool Test()
                     + NL + "}",
                     "culture-sensitive sorting of keys (e.g., 'baßk' should sort before 'basst')"),
                 ("[3.1234e15, -2.178e15, 7.59e15, 5.71138315710726E+18]",
-                    "[3123400000000000.0, -2178000000000000.0, 7590000000000000.0, 5.7113831571072604E+18]",
-                    "["+NL+"3123400000000000.0,"+NL+"-2178000000000000.0,"+NL+"7590000000000000.0,"+NL+"5.7113831571072604E+18"+NL+"]",
+                    "[3.1234E+15, -2.178E+15, 7.59E+15, 5.71138315710726E+18]",
+                    "["+NL+"3.1234E+15,"+NL+"-2.178E+15,"+NL+"7.59E+15,"+NL+"5.71138315710726E+18"+NL+"]",
                     "floating point numbers using 'E' notation that can exactly represent integers"
                 ),
                 (
@@ -1281,11 +1281,11 @@ public static bool TestTryParseNumber()
                 (".5,boo", 0, 3, "\".5,\""),
                 ("1,15.5e3E7", 2, 8, "15500.0"),
                 ("1,15.5e3E70", 2, 10, "\"15.5e3E7\""),
-                ("1,2.8e-7,7", 2, 8, "2.8000000000000002E-07"),
+                ("1,2.8e-7,7", 2, 8, "2.8E-07"),
                 (";17.4e+11,7", 1, 9, "1740000000000.0"),
                 ("1,15.5e3e7", 2, 8, "15500.0"),
                 ("1,15.5e3e70", 2, 10, "\"15.5e3e7\""),
-                ("1,2.8E-7,7", 2, 8, "2.8000000000000002E-07"),
+                ("1,2.8E-7,7", 2, 8, "2.8E-07"),
                 (";17.4E+11,7", 1, 9, "1740000000000.0"),
                 ("1,15.5Eb,ekr", 2, 8, "\"15.5Eb\""),
                 ("a,0x123456789abc,3", 2, 16, "20015998343868"),

diff --git a/JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs b/JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs
@@ -814,7 +814,7 @@ public static bool Test()
 					"{\"a\": [\"x\", \"y\", \"z\"], \"b\": [[1], [2.2,3]], \"c\": NaN}",
                     "[" +
                         "{\"a\": \"x\", \"b\": \"[1]\", \"c\": NaN}, " +
-                        "{\"a\": \"y\", \"b\": \"[2.2000000000000002, 3]\", \"c\": NaN}, " +
+                        "{\"a\": \"y\", \"b\": \"[2.2, 3]\", \"c\": NaN}, " +
                         "{\"a\": \"z\", \"b\": \"\", \"c\": NaN}" +
                     "]"
                 },

diff --git a/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs b/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
@@ -521,7 +521,7 @@ public static bool Test()
                 ("compare_text", new object[]{"[1,2,3]"}),
                 ("overwrite", new object[]{"[1,2,-9e15]\r\n//foo"}),
                 ("compress", new object[]{}),
-                ("compare_text", new object[]{"[1,2,-9000000000000000.0]"}),
+                ("compare_text", new object[]{"[1,2,-9E+15]"}),
                 // TEST PARSE JSON LINES
                 ("overwrite", new object[]{"[1,2,3]\r\n{\"a\": 1, \"b\": [-3,-4]}\r\n-7\r\nfalse"}),
                 ("tree_open", new object[]{}), // to close the tree so it can be reopened

diff --git a/JsonToolsNppPlugin/Tests/YamlDumperTests.cs b/JsonToolsNppPlugin/Tests/YamlDumperTests.cs
@@ -55,7 +55,7 @@ public static bool Test()
                 new string[] { "[\" big \"]", "- \" big \"\n", "leading or ending space in array value" },
                 new string[] { "\"a \"", "\"a \"\n", "scalar string" },
                 new string[] { "9", "9\n", "scalar int" },
-                new string[] { "-940.3", "-940.29999999999995\n", "scalar float" },
+                new string[] { "-940.3", "-940.3\n", "scalar float" },
                 new string[] { "[true, false]", "- True\n- False\n", "scalar bools" },
                 new string[] { "[null, Infinity, -Infinity, NaN]", "- null\n- .inf\n- -.inf\n- .nan\n", "null, +/-infinity, NaN" },
                 // in the below case, there's actually a bit of an error;