Skip to content

Commit

Permalink
Address issues 81+83, avoid too-verbose num reprs
Browse files Browse the repository at this point in the history
Decimal numbers have often been too verbose since v8.1.
Unfortunately I couldn't find an algorithm that simultaneously
    * avoids loss of precision on numbers where the most precise 64-bit floating point representation
        has at least 16 digits of precision (e.g., -1.79769313486232E+308)
    * while also not being too verbose with many other numbers (e.g., writing 11.11 as 11.099999999999999)
    * and does not have noticeably worse performance.
In fact, my new algorithm is about 100% slower than the algorithm used since this commit (7522a20)
and will, in the limiting case where all numbers require at least 16 digits of precision, be closer to 200% slower.
Obviously most real JSON documents are not just long arrays of decimal numbers, so the actual performance loss will generally be less than that.
  • Loading branch information
molsonkiko committed Nov 3, 2024
1 parent 9f96580 commit f259bf6
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 81 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

### Fixed

1. Fix issue (introduced in v8.1, see issues [83](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/83) and [81](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/81)) where decimal numbers were given unnecessarily precise string representations. __For example, in JsonTools v8.1, `11.11` would be represented as `11.109999999999999` even though the original `11.11` was an equally valid representation.__ Now all decimal numbers will be given the more compact representation used in v8.0 and earlier *unless the more verbose representation introduced in v8.1 would be necessary to avoid loss of precision* (so there will be no regression on [issue 78](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/78)). Note that __this new algorithm for formatting decimal numbers is about twice as slow as the algorithm used in v8.1__, although the impact on performance will be much less dramatic unless you are compressing an array where almost every element is a non-integer decimal number.
1. Fix the following issues with [random string from regex](/docs/README.md#random-strings-from-regex-added-in-v81):
- It previously incorrectly flagged some valid regular expressions (e.g. `(?-i)(?:xy{1,2}){,2}`) as having two consecutive quantifiers.
- It previously did not correctly handle some character sets where the final character was `-` (for example, `[+-]` previously would only generate `+`, and now it correctly has a 50% chance of generating `-` or `+`)
Expand Down
38 changes: 26 additions & 12 deletions JsonToolsNppPlugin/JSONTools/JNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -403,14 +403,36 @@ public static void StrToSb(StringBuilder sb, string s)
}

/// <summary>
/// returns d formatted with up to 17 digits of precision, using '.' as the decimal separator.<br></br>
/// For whatever reason, d.ToString(<see cref="DOT_DECIMAL_SEP"/>) only gives 15 digits of precision by default.
/// Let <c>d17</c> = d.ToString("G17", <see cref="JNode.DOT_DECIMAL_SEP"/>) (which can always be parsed to regenerate a double equal to <c>d</c>)<br></br>
/// and let <c>d15</c> = d.ToString(<see cref="JNode.DOT_DECIMAL_SEP"/>) (which only keeps 15 digits of precision)<br></br>
/// Returns <c>d</c> formatted with up to 17 digits of precision, using '.' as the decimal separator.<br></br>
/// If <c>d17</c> includes 17 digits of precision, we will generate <c>d15</c>.<br></br>
/// If <c>d15</c> is shorter than <c>d17</c>, and if (<c>double.Parse(d15) == d</c>, we will prefer <c>d15</c> because <c>d17</c> was an unncessarily verbose representation of <c>d</c>.
/// </summary>
/// <param name="d"></param>
/// <returns></returns>
public static string DoubleToString(double d)
{
return d.ToString("G17", DOT_DECIMAL_SEP);
string dubstring = d.ToString(DOT_DECIMAL_SEP);
int indexOfE = dubstring.IndexOf('E');
if (d == Math.Round(d) && !(d > long.MaxValue || d < long.MinValue) && indexOfE < 0)
{
// add ending ".0" to distinguish doubles equal to integers from actual integers
// unless they use exponential notation, in which case you mess things up
// by turning something like 3.123E+15 into 3.123E+15.0 (a non-JSON number representation)
return dubstring + ".0";
}
// the default d.ToString(DOT_DECIMAL_SEP) might lose precision in some cases.
// We will nonetheless prefer this representation because the G17 representation
// has stupid unnecessarily verbose representations like representing 2317.24 as 2317.2399999999998
// We need to parse dubstring to make sure no precision has been lost.
try
{
if (double.Parse(dubstring) == d)
return dubstring; // default string representation has all necessary precision
}
catch { }
return d.ToString("G17", DOT_DECIMAL_SEP); // we need to use a string representation that retains as much precision as possible
}

/// <summary>
Expand All @@ -436,15 +458,7 @@ public virtual string ToString(bool sortKeys = true, string keyValueSep = ": ",
return (v < 0) ? "-Infinity" : "Infinity";
}
if (double.IsNaN(v)) { return "NaN"; }
string dubstring = DoubleToString(v);
if (v == Math.Round(v) && !(v > long.MaxValue || v < long.MinValue) && dubstring.IndexOf('E') < 0)
{
// add ending ".0" to distinguish doubles equal to integers from actual integers
// unless they use exponential notation, in which case you mess things up
// by turning something like 3.123E+15 into 3.123E+15.0 (a non-JSON number representation)
return dubstring + ".0";
}
return dubstring;
return DoubleToString(v);
}
case Dtype.INT: return Convert.ToInt64(value).ToString();
case Dtype.NULL: return "null";
Expand Down
2 changes: 0 additions & 2 deletions JsonToolsNppPlugin/JSONTools/YamlDumper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,6 @@ private string YamlValRepr(JNode v)
{
return ".nan";
}
if (d % 1 == 0)
return $"{d}.0"; // add .0 at end of floats that are equal to ints
return JNode.DoubleToString(d);
}
return strv;
Expand Down
4 changes: 2 additions & 2 deletions JsonToolsNppPlugin/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@
// Build Number
// Revision
//
[assembly: AssemblyVersion("8.1.0.13")]
[assembly: AssemblyFileVersion("8.1.0.13")]
[assembly: AssemblyVersion("8.1.0.14")]
[assembly: AssemblyFileVersion("8.1.0.14")]
12 changes: 6 additions & 6 deletions JsonToolsNppPlugin/Tests/JsonParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ public static bool Test()
("[[]]", "[[]]", "[" + NL + " [" + NL + " ]" + NL + "]", "empty lists" ),
("\"abc\"", "\"abc\"", "\"abc\"", "scalar string" ),
("1", "1", "1", "scalar int" ),
("-1.0", "-1.0", "-1.0", "negative scalar float" ),
("3.5", "3.5", "3.5", "scalar float" ),
("-2317.24", "-2317.24", "-2317.24", "negative scalar float" ),
("11.11", "11.11", "11.11", "scalar float" ),
("-4", "-4", "-4", "negative scalar int" ),
("[{\"FullName\":\"C:\\\\123467756\\\\Z\",\"LastWriteTimeUtc\":\"\\/Date(1600790697130)\\/\"}," +
"{\"FullName\":\"C:\\\\123467756\\\\Z\\\\B\",\"LastWriteTimeUtc\":\"\\/Date(1618852147285)\\/\"}]",
Expand Down Expand Up @@ -203,8 +203,8 @@ public static bool Test()
+ NL + "}",
"culture-sensitive sorting of keys (e.g., 'baßk' should sort before 'basst')"),
("[3.1234e15, -2.178e15, 7.59e15, 5.71138315710726E+18]",
"[3123400000000000.0, -2178000000000000.0, 7590000000000000.0, 5.7113831571072604E+18]",
"["+NL+"3123400000000000.0,"+NL+"-2178000000000000.0,"+NL+"7590000000000000.0,"+NL+"5.7113831571072604E+18"+NL+"]",
"[3.1234E+15, -2.178E+15, 7.59E+15, 5.71138315710726E+18]",
"["+NL+"3.1234E+15,"+NL+"-2.178E+15,"+NL+"7.59E+15,"+NL+"5.71138315710726E+18"+NL+"]",
"floating point numbers using 'E' notation that can exactly represent integers"
),
(
Expand Down Expand Up @@ -1281,11 +1281,11 @@ public static bool TestTryParseNumber()
(".5,boo", 0, 3, "\".5,\""),
("1,15.5e3E7", 2, 8, "15500.0"),
("1,15.5e3E70", 2, 10, "\"15.5e3E7\""),
("1,2.8e-7,7", 2, 8, "2.8000000000000002E-07"),
("1,2.8e-7,7", 2, 8, "2.8E-07"),
(";17.4e+11,7", 1, 9, "1740000000000.0"),
("1,15.5e3e7", 2, 8, "15500.0"),
("1,15.5e3e70", 2, 10, "\"15.5e3e7\""),
("1,2.8E-7,7", 2, 8, "2.8000000000000002E-07"),
("1,2.8E-7,7", 2, 8, "2.8E-07"),
(";17.4E+11,7", 1, 9, "1740000000000.0"),
("1,15.5Eb,ekr", 2, 8, "\"15.5Eb\""),
("a,0x123456789abc,3", 2, 16, "20015998343868"),
Expand Down
2 changes: 1 addition & 1 deletion JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ public static bool Test()
"{\"a\": [\"x\", \"y\", \"z\"], \"b\": [[1], [2.2,3]], \"c\": NaN}",
"[" +
"{\"a\": \"x\", \"b\": \"[1]\", \"c\": NaN}, " +
"{\"a\": \"y\", \"b\": \"[2.2000000000000002, 3]\", \"c\": NaN}, " +
"{\"a\": \"y\", \"b\": \"[2.2, 3]\", \"c\": NaN}, " +
"{\"a\": \"z\", \"b\": \"\", \"c\": NaN}" +
"]"
},
Expand Down
2 changes: 1 addition & 1 deletion JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ public static bool Test()
("compare_text", new object[]{"[1,2,3]"}),
("overwrite", new object[]{"[1,2,-9e15]\r\n//foo"}),
("compress", new object[]{}),
("compare_text", new object[]{"[1,2,-9000000000000000.0]"}),
("compare_text", new object[]{"[1,2,-9E+15]"}),
// TEST PARSE JSON LINES
("overwrite", new object[]{"[1,2,3]\r\n{\"a\": 1, \"b\": [-3,-4]}\r\n-7\r\nfalse"}),
("tree_open", new object[]{}), // to close the tree so it can be reopened
Expand Down
2 changes: 1 addition & 1 deletion JsonToolsNppPlugin/Tests/YamlDumperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static bool Test()
new string[] { "[\" big \"]", "- \" big \"\n", "leading or ending space in array value" },
new string[] { "\"a \"", "\"a \"\n", "scalar string" },
new string[] { "9", "9\n", "scalar int" },
new string[] { "-940.3", "-940.29999999999995\n", "scalar float" },
new string[] { "-940.3", "-940.3\n", "scalar float" },
new string[] { "[true, false]", "- True\n- False\n", "scalar bools" },
new string[] { "[null, Infinity, -Infinity, NaN]", "- null\n- .inf\n- -.inf\n- .nan\n", "null, +/-infinity, NaN" },
// in the below case, there's actually a bit of an error;
Expand Down
Loading

0 comments on commit f259bf6

Please sign in to comment.