args)
@@ -3025,6 +3112,7 @@ public static JNode ObjectsToJNode(object obj)
["at"] = new ArgFunction(At, "at", Dtype.ANYTHING, 2, 2, false, new Dtype[] {Dtype.ARR_OR_OBJ, Dtype.ARR | Dtype.INT | Dtype.STR}),
["avg"] = new ArgFunction(Mean, "avg", Dtype.FLOAT, 1, 1, false, new Dtype[] { Dtype.ARR }),
["concat"] = new ArgFunction(Concat, "concat", Dtype.ARR_OR_OBJ, 2, int.MaxValue, false, new Dtype[] { Dtype.ITERABLE, Dtype.ITERABLE, /* any # of args */ Dtype.ITERABLE }),
+ ["csv_regex"] = new ArgFunction(CsvRegexAsJNode, "csv_regex", Dtype.REGEX, 1, 4, false, new Dtype[] {Dtype.INT, Dtype.STR, Dtype.STR, Dtype.STR}),
["dict"] = new ArgFunction(Dict, "dict", Dtype.OBJ, 1, 1, false, new Dtype[] { Dtype.ARR }),
["enumerate"] = new ArgFunction(Enumerate, "enumerate", Dtype.ARR, 1, 1, false, new Dtype[] { Dtype.ARR }),
["flatten"] = new ArgFunction(Flatten, "flatten", Dtype.ARR, 1, 2, false, new Dtype[] { Dtype.ARR, Dtype.INT }),
@@ -3074,7 +3162,7 @@ public static JNode ObjectsToJNode(object obj)
["round"] = new ArgFunction(Round, "round", Dtype.FLOAT_OR_INT, 1, 2, true, new Dtype[] {Dtype.FLOAT_OR_INT | Dtype.ITERABLE, Dtype.INT}),
["s_count"] = new ArgFunction(StrCount, "s_count", Dtype.INT, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX}),
["s_csv"] = new ArgFunction(CsvRead, "s_csv", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.INT, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((2, Dtype.NULL, x => new JNode(",")), (3, Dtype.NULL, x => new JNode("\r\n")), (4, Dtype.NULL, x => new JNode("\"")), (5, Dtype.NULL, x => new JNode("n")))),
- ["s_fa"] = new ArgFunction(StrFindAll, "s_fa", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.INT}, true, new ArgsTransform((1, Dtype.STR_OR_REGEX, TransformRegex))),
+ ["s_fa"] = new ArgFunction(StrFindAll, "s_fa", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] { Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.BOOL | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((1, Dtype.STR_OR_REGEX, TransformRegex), (2, Dtype.NULL, x => new JNode(false)))),
["s_find"] = new ArgFunction(StrFind, "s_find", Dtype.ARR, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.REGEX}),
["s_len"] = new ArgFunction(StrLen, "s_len", Dtype.INT, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}),
["s_lower"] = new ArgFunction(StrLower, "s_lower", Dtype.STR, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}),
diff --git a/JsonToolsNppPlugin/JSONTools/RemesPathLexer.cs b/JsonToolsNppPlugin/JSONTools/RemesPathLexer.cs
index b239e9e..9b71df0 100644
--- a/JsonToolsNppPlugin/JSONTools/RemesPathLexer.cs
+++ b/JsonToolsNppPlugin/JSONTools/RemesPathLexer.cs
@@ -77,7 +77,7 @@ public RemesPathLexer()
@"(->)|" + // delimiters containing characters that conflict with binops
@"(&|\||\^|=~|[!=]=|<=?|>=?|\+|-|//?|%|\*\*?)|" + // binops
@"([,\[\]\(\)\{\}\.:=!;])|" + // delimiters
- @"([gj]?(? tokens)
{
if (tokens == null)
diff --git a/JsonToolsNppPlugin/JsonToolsNppPlugin.csproj b/JsonToolsNppPlugin/JsonToolsNppPlugin.csproj
index 120a128..156c520 100644
--- a/JsonToolsNppPlugin/JsonToolsNppPlugin.csproj
+++ b/JsonToolsNppPlugin/JsonToolsNppPlugin.csproj
@@ -211,7 +211,7 @@
-
+
diff --git a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs
index 65a8b98..f922424 100644
--- a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs
+++ b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs
@@ -29,5 +29,5 @@
// Build Number
// Revision
//
-[assembly: AssemblyVersion("5.8.0.12")]
-[assembly: AssemblyFileVersion("5.8.0.12")]
+[assembly: AssemblyVersion("5.8.0.13")]
+[assembly: AssemblyFileVersion("5.8.0.13")]
diff --git a/JsonToolsNppPlugin/Tests/FormatPathTester.cs b/JsonToolsNppPlugin/Tests/FormatPathTests.cs
similarity index 84%
rename from JsonToolsNppPlugin/Tests/FormatPathTester.cs
rename to JsonToolsNppPlugin/Tests/FormatPathTests.cs
index 0ddad8e..08cb397 100644
--- a/JsonToolsNppPlugin/Tests/FormatPathTester.cs
+++ b/JsonToolsNppPlugin/Tests/FormatPathTests.cs
@@ -1,10 +1,4 @@
using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-//using System.Windows.Forms;
-//using JSON_Tools.Forms;
using JSON_Tools.JSON_Tools;
using JSON_Tools.Utils;
@@ -15,7 +9,7 @@ public class FormatPathTester
public static bool Test()
{
JsonParser parser = new JsonParser();
- JNode json = parser.Parse("{\"a\":1,\"b \":\"f\",\"cu\":[false,true,[{\"\":{\"bufrear\":null}}]],\"d'\":-1.5,\"e\\\"\":[NaN,Infinity,-Infinity]}");
+ JNode json = parser.Parse("{\"a\":1,\"b \":\"f\",\"cu\":[false,true,[{\"\":{\"bufrear\":null}}]],\"d'\":-1.5,\"b\\\\e\\\"\\r\\n\\t`\":[NaN,Infinity,-Infinity]}");
var testcases = new (int pos, KeyStyle style, string correct_path)[]
{
(6, KeyStyle.JavaScript, ".a"),
@@ -39,9 +33,9 @@ public static bool Test()
(66, KeyStyle.JavaScript, "[\"d'\"]"),
(66, KeyStyle.RemesPath, "[`d'`]"),
(66, KeyStyle.Python, "[\"d'\"]"),
- (93, KeyStyle.JavaScript, "['e\"'][2]"),
- (93, KeyStyle.RemesPath, "[`e\\\\\"`][2]"),
- (93, KeyStyle.Python, "['e\"'][2]"),
+ (93, KeyStyle.JavaScript, "['b\\\\e\"\\r\\n\\t`'][1]"),
+ (93, KeyStyle.RemesPath, "[`b\\\\e\"\\r\\n\\t\\``][1]"),
+ (93, KeyStyle.Python, "['b\\\\e\"\\r\\n\\t`'][1]"),
};
int ii = 0;
int tests_failed = 0;
@@ -56,13 +50,13 @@ public static bool Test()
catch (Exception ex)
{
tests_failed++;
- Npp.AddLine($"While trying to get the path to position {pos}, threw exception\r\n{ex}");
+ Npp.AddLine($"While trying to get the path to position {pos} ({style} style), threw exception\r\n{ex}");
continue;
}
if (path != correct_path)
{
tests_failed++;
- Npp.AddLine($"Got the path to position {pos} as {path}, but it should be {correct_path}");
+ Npp.AddLine($"Got the path to position {pos} ({style} style) as {path}, but it should be {correct_path}");
}
}
Npp.AddLine($"Failed {tests_failed} tests.");
diff --git a/JsonToolsNppPlugin/Tests/JsonParserTests.cs b/JsonToolsNppPlugin/Tests/JsonParserTests.cs
index b287164..0075981 100644
--- a/JsonToolsNppPlugin/Tests/JsonParserTests.cs
+++ b/JsonToolsNppPlugin/Tests/JsonParserTests.cs
@@ -848,7 +848,7 @@ public static bool TestLinter()
"Whitespace characters other than ' ', '\\t', '\\r', and '\\n' are only allowed in JSON5",
"Whitespace characters other than ' ', '\\t', '\\r', and '\\n' are only allowed in JSON5",
}),
- ("{foo: 1, $baz: 2, 草: 2, _quЯ: 3, \\ud83d\\ude00_$\\u1ed3: 4, a\\uff6acf: 5, \\u0008\\u000a: 6}",
+ ("{foo: 1, $baz: 2, 草: 2, _quЯ: 3, \\ud83d\\ude00_$\\u1ed3: 4, a\\uff6acf: 5, \\u0008\\u000a: 6, f\\u0000o: 1}",
"{\"foo\": 1, \"$baz\": 2, \"草\": 2, \"_quЯ\": 3, \"😀_$ồ\": 4, \"aェcf\": 5, \"\\b\\n\": 6}",
new string[]{
"Unquoted keys are only supported in JSON5",
@@ -860,6 +860,8 @@ public static bool TestLinter()
"Unquoted keys are only supported in JSON5",
"Control characters (ASCII code less than 0x20) are disallowed inside strings under the strict JSON specification",
"String literal contains newline", // the \u000a in \\b\\u000a is secretly a newline
+ "Unquoted keys are only supported in JSON5",
+ "'\\x00' is the null character, which is illegal in JsonTools"
}),
("[1,\"b\\\nb\\\rb\\\r\nb\"]", "[1, \"bbbb\"]",
new string[]
diff --git a/JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs b/JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs
index 354c8cb..37b3471 100644
--- a/JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs
+++ b/JsonToolsNppPlugin/Tests/JsonTabularizerTests.cs
@@ -470,12 +470,12 @@ public static bool Test()
),
(
"[" +
- "{\"a\": 1, \"b\": \"[1, 2, 3]\", \"c\": \"{\\\"d\\\": \\\"y\\\"}\"}," +
- "{\"a\": 2, \"b\": \"[4, 5, 6]\", \"c\": \"{\\\"d\\\": \\\"z\\\"}\"}" +
- "]", // test stringified iterables
+ "{\"a\": 1, \"b\": \"[1, 2, 3]\", \"c\": \"{\\\"d\\\": \\\"y\\\"}\"}," +
+ "{\"a\": 2, \"b\": \"[4, 5, 6]\", \"c\": \"{\\\"d\\\": \\\"z\\\"}\"}" +
+ "]", // test stringified iterables
"a\tb\tc\r\n" +
- "1\t[1, 2, 3]\t{\"d\": \"y\"}\r\n" +
- "2\t[4, 5, 6]\t{\"d\": \"z\"}\r\n",
+ "1\t[1, 2, 3]\t\"{\"\"d\"\": \"\"y\"\"}\"\r\n" +
+ "2\t[4, 5, 6]\t\"{\"\"d\"\": \"\"z\"\"}\"\r\n",
'\t', '"', null, false, "\r\n"
),
( "[{\"a\": null, \"b\": 1.0}, {\"a\": \"blah\", \"b\": NaN}]", // nulls and NaNs
@@ -521,9 +521,9 @@ public static bool Test()
"cost,date,num\r\n100.5,1999-01-03 07:03:29,13\r\n", // datetimes
',', '"', null, false, "\r\n"
),
- ("[{\"name\": \"\\\"Dr. Blutentharst\\\"\", \"phone number\": \"420-997-1043\"}," +
+ ("[{\"name\": \"The Exalted \\\"Samuel Blutentharst\\\" of Doom\", \"phone number\": \"420-997-1043\"}," +
"{\"name\": \"\\\"Fjordlak the Deranged\\\"\", \"phone number\": \"blo-od4-blud\"}]", // internal quote chars
- "name,phone number\r\n\"Dr. Blutentharst\",420-997-1043\r\n\"Fjordlak the Deranged\",blo-od4-blud\r\n",
+ "name,phone number\r\n\"The Exalted \"\"Samuel Blutentharst\"\" of Doom\",420-997-1043\r\n\"\"\"Fjordlak the Deranged\"\"\",blo-od4-blud\r\n",
',', '"', null, false, "\r\n"
),
("[{\"a\": \"new\\r\\nline\", \"b\": 1}]", // internal newlines
@@ -535,7 +535,7 @@ public static bool Test()
',', '"', null, false, "\r"
),
("[{\"a\": \"n,ew\\nl'i\\rne\", \"b\": 1, \"c\": \"a\\r\\nbc\"}]", // internal newlines and quote chars (use '\'') and delims
- "a,b,c\r\n'n,ew\nl\\'i\rne',1,'a\r\nbc'\r\n",
+ "a,b,c\r\n'n,ew\nl''i\rne',1,'a\r\nbc'\r\n",
',', '\'', null, false, "\r\n"
),
(
diff --git a/JsonToolsNppPlugin/Tests/RemesPathTests.cs b/JsonToolsNppPlugin/Tests/RemesPathTests.cs
index 0ad00a4..33d9933 100644
--- a/JsonToolsNppPlugin/Tests/RemesPathTests.cs
+++ b/JsonToolsNppPlugin/Tests/RemesPathTests.cs
@@ -326,6 +326,9 @@ public static bool Test()
new Query_DesiredResult("sort_by(j`[[1, 2], [3, 1], [4, -1]]`, -1)", "[[4, -1], [3, 1], [1, 2]]"),
new Query_DesiredResult("sort_by(j`[\"abc\", \"defg\", \"h\", \"ij\"]`, s_len(@), true)", "[\"defg\", \"abc\", \"ij\", \"h\"]"), // sort_by with function as input
new Query_DesiredResult("quantile(flatten(@.foo[1:]), 0.5)", "5.5"),
+ new Query_DesiredResult("@{`b\\\\e\"\\r\\n\\t\\``: 1}.`b\\\\e\"\\r\\n\\t\\``", "1"), // make sure that indexing with the same quoted string used for the key returns the value associated with that key, even if the quoted string has escapes and stuff
+ new Query_DesiredResult("`\\\\\\` \\\\`", "\"\\\\` \\\\\""), // escapes directly before a backtick must be supported, as well as internal backticks preceded by an even number of '\\' chars
+ new Query_DesiredResult("j`{\"b\\\\\\\\e\\\"\\r\\n\\t\\`\": 1}`.`b\\\\e\"\\r\\n\\t\\``", "1"), // as above, but using a JSON literal for the same object
new Query_DesiredResult("float(@.foo[0])[:1]", "[0.0]"),
new Query_DesiredResult("not(is_expr(values(@.bar)))", "[true, false]"),
new Query_DesiredResult("round(@.foo[0] * 1.66)", "[0, 2, 3]"),
@@ -474,16 +477,19 @@ public static bool Test()
"[{\"al\": [2, 4], \"bt\": [\"null\"]}, {\"al\": [4, 2, 0], \"bt\": [\"number\", \"object\"]}]"),
new Query_DesiredResult("concat(@.foo[0][:]->(str(@)*(@+1)), keys(@.`7`[0])[:]->(@ + s_slice(@, ::-1)))",
"[\"0\", \"11\", \"222\", \"foooof\"]"),
+ new Query_DesiredResult("s_fa(`a&b&c\\nfoob&ar##`, csv_regex(@.foo[0][2] + 1, `&`, `\\n`, `#`))",
+ "[[\"a\", \"b\", \"c\"], [\"foo\", \"#b&ar#\", \"##\"]]"), // this is different from calling s_csv(3, `&`, `\n`, `#`) on the same input, because s_fa doesn't do the postprocessing of strings with quote characters
+ new Query_DesiredResult("str(csv_regex(5, , , `'`))", JNode.StrToString(JNode.StrToString(ArgFunction.CsvRowRegex(5, quote:'\''), true), true)),
// ===================== s_csv CSV parser ========================
// 3-column 14 rows, ',' delimiter, CRLF newline, '"' quote character, newline before EOF
new Query_DesiredResult("s_csv(`nums,names,cities\\r\\n" +
"nan,Bluds,BUS\\r\\n" +
",,\\r\\n" +
- "nan,\"\",BUS" +
- "\\r\\n0.5,\"df\\r\\ns \\\"d\\\" \",FUDG\\r\\n" +
- "0.5,df\"sd,FUDG\\r\\n" + // valid; unescaped quotes are fine on a quoted line
+ "nan,\"\",BUS\\r\\n" +
+ "0.5,\"df\\r\\ns \"\"d\"\" \",FUDG\\r\\n" + // newlines and quotes inside a value
+ "0.5,\"df\"\"sd\",FUDG\\r\\n" +
"\"\",,FUDG\\r\\n" +
- "0.5,df\\ns\\rd,\"\"\\r\\n" + // valid; neither \n nor \r is the designated newline
+ "0.5,\"df\\ns\\rd\",\"\"\\r\\n" +
"1.2,qere,GOLAR\\r\\n" +
"1.2,qere,GOLAR\\r\\n" +
"3.4,flodt,\"q,tun\"\\r\\n" +
@@ -491,30 +497,30 @@ public static bool Test()
"4.6,Kjond,YUNOB\\r\\n" +
"7,Unyir,\\r\\n`" +
", 3, , , , h)", // only 3 columns and string need to be specified; comma delim, CRLF newline, and '"' quote are defaults
- "[[\"nums\",\"names\",\"cities\"],[\"nan\",\"Bluds\",\"BUS\"],[\"\",\"\",\"\"],[\"nan\",\"\\\"\\\"\",\"BUS\"],[\"0.5\",\"\\\"df\\r\\ns \\\\\\\"d\\\\\\\" \\\"\",\"FUDG\"],[\"0.5\",\"df\\\"sd\",\"FUDG\"],[\"\\\"\\\"\",\"\",\"FUDG\"],[\"0.5\",\"df\\ns\\rd\",\"\\\"\\\"\"],[\"1.2\",\"qere\",\"GOLAR\"],[\"1.2\",\"qere\",\"GOLAR\"],[\"3.4\",\"flodt\",\"\\\"q,tun\\\"\"],[\"4.6\",\"Kjond\",\"YUNOB\"],[\"4.6\",\"Kjond\",\"YUNOB\"],[\"7\",\"Unyir\",\"\"]]"),
+ "[[\"nums\",\"names\",\"cities\"],[\"nan\",\"Bluds\",\"BUS\"],[\"\",\"\",\"\"],[\"nan\",\"\",\"BUS\"],[\"0.5\",\"df\\r\\ns \\\"d\\\" \",\"FUDG\"],[\"0.5\",\"df\\\"sd\",\"FUDG\"],[\"\",\"\",\"FUDG\"],[\"0.5\",\"df\\ns\\rd\",\"\"],[\"1.2\",\"qere\",\"GOLAR\"],[\"1.2\",\"qere\",\"GOLAR\"],[\"3.4\",\"flodt\",\"q,tun\"],[\"4.6\",\"Kjond\",\"YUNOB\"],[\"4.6\",\"Kjond\",\"YUNOB\"],[\"7\",\"Unyir\",\"\"]]"),
// 7 columns, 8 rows '\t' delimiter, LF newline, '\'' quote character, no newline before EOF
new Query_DesiredResult("s_csv(`nums\\tnames\\tcities\\tdate\\tzone\\tsubzone\\tcontaminated\\n" +
"nan\\tBluds\\tBUS\\t\\t1\\t''\\tTRUE\\n" +
"0.5\\tdfsd\\tFUDG\\t12/13/2020 0:00\\t2\\tc\\tTRUE\\n" +
"\\tqere\\tGOLAR\\t\\t3\\tf\\t\\n" +
- "1.2\\tqere\\t'GOL\\\\'AR'\\t\\t3\\th\\tTRUE\\n" +
+ "1.2\\tqere\\t'GO\\tLA''R'\\t\\t3\\th\\tTRUE\\n" + // third column is "GO\tLA'R"; the internal quote character ' is escaped by itself.
"''\\tflodt\\t'q\\ttun'\\t\\t4\\tq\\tFALSE\\n" +
"4.6\\tKjond\\t\\t\\t\\tw\\t''\\n" +
"4.6\\t'Kj\\nond'\\tYUNOB\\t10/17/2014 0:00\\t5\\tz\\tFALSE`" +
- ", 7, `\t`, `\n`, `'`, h)",
- "[[\"nums\",\"names\",\"cities\",\"date\",\"zone\",\"subzone\",\"contaminated\"],[\"nan\",\"Bluds\",\"BUS\",\"\",\"1\",\"''\",\"TRUE\"],[\"0.5\",\"dfsd\",\"FUDG\",\"12/13/2020 0:00\",\"2\",\"c\",\"TRUE\"],[\"\",\"qere\",\"GOLAR\",\"\",\"3\",\"f\",\"\"],[\"1.2\",\"qere\",\"'GOL\\\\'AR'\",\"\",\"3\",\"h\",\"TRUE\"],[\"''\",\"flodt\",\"'q\\ttun'\",\"\",\"4\",\"q\",\"FALSE\"],[\"4.6\",\"Kjond\",\"\",\"\",\"\",\"w\",\"''\"],[\"4.6\",\"'Kj\\nond'\",\"YUNOB\",\"10/17/2014 0:00\",\"5\",\"z\",\"FALSE\"]]"),
+ ", 7, `\t`, `\\n`, `'`, h)",
+ "[[\"nums\",\"names\",\"cities\",\"date\",\"zone\",\"subzone\",\"contaminated\"],[\"nan\",\"Bluds\",\"BUS\",\"\",\"1\",\"\",\"TRUE\"],[\"0.5\",\"dfsd\",\"FUDG\",\"12/13/2020 0:00\",\"2\",\"c\",\"TRUE\"],[\"\",\"qere\",\"GOLAR\",\"\",\"3\",\"f\",\"\"],[\"1.2\",\"qere\",\"GO\\tLA'R\",\"\",\"3\",\"h\",\"TRUE\"],[\"\",\"flodt\",\"q\\ttun\",\"\",\"4\",\"q\",\"FALSE\"],[\"4.6\",\"Kjond\",\"\",\"\",\"\",\"w\",\"\"],[\"4.6\",\"Kj\\nond\",\"YUNOB\",\"10/17/2014 0:00\",\"5\",\"z\",\"FALSE\"]]"),
// 1-column, '^' delimiter, '$' quote character, '\r' newline, 7 valid rows with 2 invalid rows at the end
new Query_DesiredResult("s_csv(`a\\r" +
"$b^c$\\r" +
- "$new\\r\\$line\\$$\\r" +
+ "$new\\r$$line\\$$$\\r" + // two places where quote char escapes itself
"\\r" +
"\\r" +
"$$\\r" +
- "d$\ne\\r" +
+ "$d$$\\ne$\\r" +
"$ $ $\\r" + // invalid because there's an unescaped quote character on a quoted line
"f^g`" + // invalid because there's a delimiter on an unquoted line
", 1, `^`, `\\r`, `$`, h)",
- "[\"a\",\"$b^c$\",\"$new\\r\\\\$line\\\\$$\",\"\",\"\",\"$$\",\"d$\\ne\"]"),
+ "[\"a\",\"b^c\",\"new\\r$line\\\\$\",\"\",\"\",\"\",\"d$\\ne\"]"),
// 1-column, default (delimiter, newline, and quote), parse column 1 as number, 4 number rows and 4 non-number rows
new Query_DesiredResult("s_csv(`1.5\\r\\n" +
"-2\\r\\n" +
@@ -524,19 +530,19 @@ public static bool Test()
"1e3b\\r\\n" +
"-a\\r\\n" +
"+b`" +
- ", 1, , , , h, 1)", // use shorthand to omit delimiter, newline, and quote
+ ", 1, , , , h, 0)", // use shorthand to omit delimiter, newline, and quote
"[1.5,-2,3e14,2e-3,\"\",\"1e3b\",\"-a\", \"+b\"]"),
- // 3-column, default (delimiter, newline, and quote), parse columns 1 and -1 (becomes column 3) as numbers
+ // 3-column, default (delimiter, newline, and quote), parse columns 0 and -1 (becomes column 3) as numbers
new Query_DesiredResult("s_csv(`1.5,foo,bar\\r\\n" +
"a,-3,NaN\\r\\n" +
"baz,quz,-Infinity`" +
- ", 3, null, null, null, h, 1, -1)",
+ ", 3, null, null, null, h, 0, -1)",
"[[1.5, \"foo\", \"bar\"], [\"a\", \"-3\", NaN], [\"baz\", \"quz\", -Infinity]]"),
- // 3-column, skip header, default (delimiter, newline, and quote), parse columns 1 and -1 (becomes column 3) as numbers
+ // 3-column, skip header, default (delimiter, newline, and quote), parse columns 0 and -1 (becomes column 2) as numbers
new Query_DesiredResult("s_csv(`1.5,foo,bar\\r\\n" +
"a,-3,NaN\\r\\n" +
"baz,quz,-Infinity`" +
- ", 3, null, null, null, n, 1, -1)",
+ ", 3, null, null, null, n, 0, -1)",
"[[\"a\", \"-3\", NaN], [\"baz\", \"quz\", -Infinity]]"),
// 4-column, map header to values, '\t' separator, '\n' newline, parse 2nd-to-last column as numbers
new Query_DesiredResult("s_csv(`col1\\tcol2\\tcol3\\tcol4\\n" +
@@ -550,17 +556,17 @@ public static bool Test()
new Query_DesiredResult("s_csv(`a\\r" +
"$b^c$\\r" +
"\\r" +
- "$$\\r" +
- "d$\ne`" +
+ "$$$$\\r" + // need four consecutive quote chars to represent a value that is exactly one literal quote char
+ "d\\ne`" + // invalid b/c internal '\n'
", 1, `^`, `\\r`, `$`, d)",
- "[{\"a\": \"$b^c$\"}, {\"a\": \"\"}, {\"a\": \"$$\"}, {\"a\": \"d$\\ne\"}]"),
+ "[{\"a\": \"b^c\"}, {\"a\": \"\"}, {\"a\": \"$\"}]"),
// 1-column, skip header, '^' delimiter, '$' quote character, '\r' newline, parse as numbers
new Query_DesiredResult("s_csv(`a\\r" +
"1\\r" +
".3\\r" +
"5\\r" +
"-7.2`" +
- ", 1, `^`, `\\r`, `$`, , 1)", // omit header arg because it is default
+ ", 1, `^`, `\\r`, `$`, , 0)", // omit header arg because it is default
"[1, 0.3, 5, -7.2]"),
// 1-column, map header to values, '^' delimiter, '$' quote character, '\r' newline, parse as numbers
new Query_DesiredResult("s_csv(`foo\\r" +
@@ -568,7 +574,7 @@ public static bool Test()
".3\\r" +
"5\\r" +
"-7.2`" +
- ", 1, `^`, `\\r`, `$`, d, 1)",
+ ", 1, `^`, `\\r`, `$`, d, 0)",
"[{\"foo\": 1}, {\"foo\": 0.3}, {\"foo\": 5}, {\"foo\": -7.2}]"),
// ====================== s_fa function for parsing regex search results as string arrays or arrays of arrays of strings =========
// 2 capture groups (2nd optional), parse the first group as number
@@ -577,15 +583,20 @@ public static bool Test()
" 3. baz\\t schnaz\\r\\n" +
"4. 7 a\\r\\n" +
"5. `" +
- ", `^[\\x20\\t]*(\\d+)\\.\\s*(\\w+)?`, 1)",
+ ", `^[\\x20\\t]*(\\d+)\\.\\s*(\\w+)?`,,0)",
"[[1, \"foo\"], [2, \"bar\"], [3, \"baz\"], [4, \"7\"], [5, \"\"]]"),
// no capture groups, capture and parse hex numbers
- new Query_DesiredResult("s_fa(`-0x12345 0x000abcdef\\r\\n0x067890 -0x0ABCDEF\\r\\n0x123456789abcdefABCDEF`, `(?:INT)`, 1)", "[-74565,11259375,424080,-11259375, \"0x123456789abcdefABCDEF\"]"),
+ new Query_DesiredResult("s_fa(`-0x12345 0x000abcdef\\r\\n0x067890 -0x0ABCDEF\\r\\n0x123456789abcdefABCDEF`, `(?:INT)`,, 0)", "[-74565,11259375,424080,-11259375, \"0x123456789abcdefABCDEF\"]"),
// no capture groups, capture hex numbers but do not parse as numbers
new Query_DesiredResult("s_fa(`0x12345 0x000abcdef\\r\\n0x067890 0x0ABCDEF\\r\\n0x123456789abcdefABCDEF`, g`(?:INT)`)", "[\"0x12345\",\"0x000abcdef\",\"0x067890\",\"0x0ABCDEF\",\"0x123456789abcdefABCDEF\"]"),
- new Query_DesiredResult("s_fa(`-1 23 +7 -99 +0x1a -0xA2 0x7b`, g`(INT)`, 1)", "[-1,23,7,-99,26,-162,123]"),
- // capture every (word, floating point number, floating point number, two lowercase letters separated) tuple inside a element in HTML,
- // and parse the values in the 2nd and third columns as numbers
+ // no capture groups, include full match as first item, capture hex numbers but don't parse as numbers
+ new Query_DesiredResult("s_fa(`0x12345 0x000abcdef\\r\\n0x067890 0x0ABCDEF\\r\\n0x123456789abcdefABCDEF`, g`(?:INT)`, true)", "[\"0x12345\",\"0x000abcdef\",\"0x067890\",\"0x0ABCDEF\",\"0x123456789abcdefABCDEF\"]"),
+ // 1 capture group, include full match as first item, capture numbers and parse full match as numbers (but not first capture group)
+ new Query_DesiredResult("s_fa(`-1 23 +7 -99`, g`(INT)`, true, 0)", "[[-1, \"-1\"], [23, \"23\"], [7, \"+7\"], [-99, \"-99\"]]"),
+ // 1 capture group, capture hex numbers and parse as numbers
+ new Query_DesiredResult("s_fa(`-1 23 +7 -99 +0x1a -0xA2 0x7b`, g`(INT)`,, 0)", "[-1,23,7,-99,26,-162,123]"),
+ // capture every (word, floating point number, floating point number, two lowercase letters separated by '_') tuple inside a
element in HTML,
+ // and parse the values in the 2nd and 2nd-to-last columns as numbers
new Query_DesiredResult("s_fa(`
captured 2 +3 a_b \\r\\n\\r\\n failure 1 2 A_B \\r\\nalsocaptured -8\\t 5 \\tq_r
" +
"anothercatch\\t +3.5 -4.2 s_t
" +
"\\r\\n \\r\\n
nocatch -3 0.7E3
\\r\\n" +
@@ -593,8 +604,8 @@ public static bool Test()
"
finalcatch -9E2 7 y_z\\t
`, " +
"g`(?s-i)(?:
|(?!\\A)\\G)" + // match starting at a
tag OR wherever the last match ended unless wraparound (?!\A)\G
"(?:(?!
).)*?" + // keep matching until the close tag
- "([a-zA-Z]+)\\s+(NUMBER)\\s+(NUMBER)\\s+([a-z]_[a-z])`," +
- "*j`[2, 3]`)",
+ "([a-zA-Z]+)\\s+(NUMBER)\\s+(NUMBER)\\s+([a-z]_[a-z])`" +
+ ", false, *j`[1, -2]`)",
"[[\"captured\",2,3, \"a_b\"],[\"alsocaptured\",-8,5, \"q_r\"],[\"anothercatch\",3.5,-4.2, \"s_t\"],[\"finalcatch\",-900.0,7, \"y_z\"]]"),
// lines of 3 a-z chars (captured), then a not-captured (a dash and a number less than 9)
new Query_DesiredResult("s_fa(`abc-1\\r\\nbcd-7\\r\\ncde--19.5\\r\\ndef--9.2\\r\\nefg-10\\r\\nfgh-9\\r\\nab-1`" +
@@ -604,8 +615,15 @@ public static bool Test()
new Query_DesiredResult("s_fa(`foo 1. fun\\n" +
"bar .25 Baal\\n" +
"quz -2.3e2 quail`" +
- ", `^\\w+ (NUMBER) \\w+$`, 1)",
+ ", `^\\w+ (NUMBER) \\w+$`,, 1)",
"[1.0, 0.25, -230.0]"),
+ // entire line must be a three-letter word (all lowercase), followed by a colon, then any number of space chars, then (a number (capture group 1)), then any number of space chars, then (a word (capture group 2)), then an optional trailing s after capture group 2, then EOL
+ // full line is included as first item of capture group
+ new Query_DesiredResult("s_fa(`foo: 1 bagel\\r\\n" +
+ "BAR: 83 dogs\\r\\n" + // not matched (leading 3-letter word is uppercase)
+ "xyz: 9314 quiches`" +
+ ", `^[a-z]{3}:\\x20+(\\d+)\\x20+(\\w+?)s?\\r?$`, true, 1)",
+ "[[\"foo: 1 bagel\\r\", 1, \"bagel\"], [\"xyz: 9314 quiches\", 9314, \"quiche\"]]"),
};
int ii = 0;
int tests_failed = 0;
@@ -838,6 +856,7 @@ public static bool Test()
new []{"@![@ > 3]", "[1, 2, 3, 4]"},
new []{"@!{@ > 3}", "[1, 2, 3, 4]"},
new []{"s_mul(,1)", "[]"}, // omitting a non-optional argument with the "no-token-instead-of-null" shorthand
+ new []{"`\\\\``", "[]"}, // even number of escapes before a backtick inside a string
});
// test issue where sometimes a binop does not raise an error when it operates on two invalid types
string[] invalid_others = new string[] { "{}", "[]", "\"1\"" };
diff --git a/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs b/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
index 2b48a9b..eebff55 100644
--- a/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
+++ b/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
@@ -546,9 +546,9 @@ public static bool ExecuteFileManipulation(string command, List
messages
("compress", new object[]{}),
("compare_text", new object[]{"[0.25,0.75,0.5]blah[0,1.25][\"boo\",\"a\",\"c\"]"}),
// TEST QUERY THAT PRODUCES OBJECT WITH NON-"START,END" KEYS ON A FILE WITH SELECTIONS
- ("tree_query", new object[]{"j`{\"a\": \"foo\", \"b\": [1, 2]}`"}),
+ ("tree_query", new object[]{"j`{\"a\": \"foo\", \"b\\n\": [1, 2]}`"}),
("treenode_click", new object[]{new string[] {"a : \"foo\""} }),
- ("treenode_click", new object[]{new string[] {"b : [2]", "1 : 2"} }),
+ ("treenode_click", new object[]{new string[] {"b\\n : [2]", "1 : 2"} }),
// TEST MULTI-STATEMENT QUERY THAT DOESN'T MUTATE ON A FILE WITH SELECTIONS
("tree_query", new object[]{"var s = str(@);\r\n" +
"var sl = s_len(s);\r\n" +
diff --git a/docs/README.md b/docs/README.md
index 59d7027..d547529 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -66,6 +66,7 @@ __NOTES__
- `Enter` while the tree is selected toggles the selected node between expanded/collapsed.
- Up and down arrow keys can also navigate the tree.
- `Escape` takes focus from the tree view back to the editor.
+5. Beginning in [v4.4.0](/CHANGELOG.md#440---2022-11-23), you can have multiple tree views open.
If a node has a `+` or `-` sign next to it, you can click on that button to expand the children of the node, as shown here.
@@ -258,6 +259,8 @@ You can submit RemesPath queries in textbox above the tree, which by default has
Once you've submitted a query, you can use several other features of the JSON viewer.
First, you can open a new buffer containing the query result.
+Prior to [v6.0](/CHANGELOG.md#600---unreleased-2023-mm-dd), submitting a query automatically attempted to parse whatever document was currently open, thus potentially rebinding the tree to a different document. Starting in [v6.0](/CHANGELOG.md#600---unreleased-2023-mm-dd), submitting a query only triggers parsing of whatever document the treeview is currently associated with.
+
![JSON viewer open query result in new buffer](/docs/json%20viewer%20query%20save.PNG)
## Find and Replace Form ##
@@ -670,7 +673,7 @@ For every file that the JSON tries and fails to parse, the exception will be cau
If you want to see the JSON found by this tool, just click the `View results in buffer` button. This will open a new buffer in Notepad++ with an object mapping filenames and URLs to the JSON associated with them.
-This form has its own tree viewer associated with this buffer. You can use this plugin's normal tree viewer for other buffers. If you close the buffer, the tree viewer is destroyed.
+This form has its own tree viewer associated with this buffer. You can use this plugin's normal tree viewers for other buffers. If you close the buffer, the tree viewer is destroyed.
## Clearing selected files ##
diff --git a/docs/RemesPath.md b/docs/RemesPath.md
index 37fdc47..ecaf1f5 100644
--- a/docs/RemesPath.md
+++ b/docs/RemesPath.md
@@ -283,6 +283,11 @@ __EXAMPLES__
- `concat([1, 2], {"a": 2})` raises an exception because you can't concatenate arrays with objects.
- `concat(1, [1, 2])` raises an exception because you can't concatenate anything with non-iterables.
+----
+`csv_regex(nColumns: int, delim: string=",", newline: string="\r\n", quote_char: string="\"")`
+
+Returns the regex that [`s_csv`](#vectorized-functions) uses to match a single row of a CSV file (formatted according to [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt)) with delimiter `delim`, `nColumns` columns, quote character `quote_char`, and newline `newline`.
+
-----
`dict(x: array) -> object`
@@ -735,8 +740,8 @@ __Arguments:__
* `csvText` (1st arg): the text of a CSV file encoded as a JSON string
* `nColumns` (2nd arg): the number of columns
* `delimiter` (3rd arg, default `,`): the column separator
-* `newline` (4th arg default `CR LF`): the newline
-* `quote` (5th arg, default `"`): the character used to wrap columns that contain `newline` or `delimiter`.
+* `newline` (4th arg default `\r\n`): the newline. Must be one of (``)
+* `quote` (5th arg, default `"`): the character used to wrap columns that `newline`, `quote`, or `delimiter`.
* `header_handling` (6th arg, default `n`): how the header row is treated. *Must be one of `n`, `d`, or `h`.* Each of these options will be explained in the list below.
* *`n`: skip header row (this is the default)*. This would parse the CSV file `"foo,bar\n1,2` as `[["1", "2"]]`
* *`h`: include header row*. This would parse the CSV file `"foo,bar\n1,2` as `[["foo", "bar"], ["1", "2"]]`
@@ -749,14 +754,17 @@ __Return value:__
__Notes:__
* *Any row that does not have exactly `nColumns` columns will be ignored completely.*
-* Any column that starts and ends with a quote character is assumed to be a quoted string. In a quoted string, anything is fine, but *literal quote characters in a quoted column must be escaped with `\\` (backslash)*.
- * For example, `"\"quoted\",string,in,quoted column"` is a valid column in a file with `,` delimiter and `"` quote character.
+* See [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt) for the accepted format of CSV files. A brief synopsis is below.
+* Any column that starts and ends with a quote character is assumed to be a quoted string. In a quoted string, anything is fine, but *a literal quote character in a quoted column must be escaped with itself*.
+ * For example, `"""quoted"",string,in,quoted column"` is a valid column in a file with `,` delimiter and `"` quote character.
* On the other hand, `" " "` is *not a valid column if `"` is the quote character* because it contains an unescaped `"` in a quoted column.
* Finally, `a,b` would be treated as two columns in a CSV file with `"` quote character, but `"a,b"` is a single column because a comma is not treated as a column separator in a quoted column.
+* Columns containing literal quote characters or the newline characters `\r` and `\n` must be wrapped in quotes.
+* When `s_csv` parses a file, quoted values are parsed without the enclosing quotes and with any internal doubled quote characters replaced with a single instance of the quote character. Thus the valid value (for `"` quote character)`"foo""bar"` would be parsed as the JSON string `"foo\"bar"`
* You can pass in `null` for the 3rd, 4th, and 5th args. Any instance of `null` in those args will be replaced with the default value.
__Example:__
-Suppose you have the JSON string `"nums,names,cities,date,zone,subzone,contaminated\nnan,Bluds,BUS,,1,'',TRUE\n0.5,dfsd,FUDG,12/13/2020 0:00,2,c,TRUE\n,qere,GOLAR,,3,f,\n1.2,qere,'GOL\\'AR',,3,h,TRUE\n'',flodt,'q,tun',,4,q,FALSE\n4.6,Kjond,,,,w,''\n4.6,'Kj\nond',YUNOB,10/17/2014 0:00,5,z,FALSE"`
+Suppose you have the JSON string `"nums,names,cities,date,zone,subzone,contaminated\nnan,Bluds,BUS,,1,'',TRUE\n0.5,dfsd,FUDG,12/13/2020 0:00,2,c,TRUE\n,qere,GOLAR,,3,f,\n1.2,qere,'GOL''AR',,3,h,TRUE\n'',flodt,'q,tun',,4,q,FALSE\n4.6,Kjond,,,,w,''\n4.6,'Kj\nond',YUNOB,10/17/2014 0:00,5,z,FALSE"`
which represents this CSV file (7 columns, comma delimiter, `LF` newline, `'` quote character):
```
@@ -764,7 +772,7 @@ nums,names,cities,date,zone,subzone,contaminated
nan,Bluds,BUS,,1,'',TRUE
0.5,dfsd,FUDG,12/13/2020 0:00,2,c,TRUE
,qere,GOLAR,,3,f,
-1.2,qere,'GOL\'AR',,3,h,TRUE
+1.2,qere,'GOL''AR',,3,h,TRUE
'',flodt,'q,tun',,4,q,FALSE
4.6,Kjond,,,,w,''
4.6,'Kj
@@ -775,51 +783,58 @@ Notice that the 8th row of this CSV file has a newline in the middle of the seco
The query ``s_csv(@, 7, `,`, `\n`, `'`)`` will correctly parse this as *an array of seven 7-string subarrays (omitting the header)*, shown below:
```json
[
- ["nan", "Bluds", "BUS", "", "1", "''", "TRUE"],
+ ["nan", "Bluds", "BUS", "", "1", "", "TRUE"],
["0.5", "dfsd", "FUDG", "12/13/2020 0:00", "2", "c", "TRUE"],
["", "qere", "GOLAR", "", "3", "f", ""],
- ["1.2", "qere", "'GOL\\'AR'", "", "3", "h", "TRUE"],
- ["''", "flodt", "'q,tun'", "", "4", "q", "FALSE"],
- ["4.6", "Kjond", "", "", "", "w", "''"],
- ["4.6", "'Kj\nond'", "YUNOB", "10/17/2014 0:00", "5", "z", "FALSE"]
+ ["1.2", "qere", "GOL'AR", "", "3", "h", "TRUE"],
+ ["", "flodt", "q,tun", "", "4", "q", "FALSE"],
+ ["4.6", "Kjond", "", "", "", "w", ""],
+ ["4.6", "Kj\nond", "YUNOB", "10/17/2014 0:00", "5", "z", "FALSE"]
]
```
-The query ``s_csv(@, 7, `,`, `\n`, `'`, h, 1, -3)`` will correctly parse this as *an array of eight 7-item subarrays (including the heaader) with the 1st and 3rd-to-last (i.e. 5th) columns parsed as numbers where possible*, shown below:
+The query ``s_csv(@, 7, `,`, `\n`, `'`, h, 0, -3)`` will correctly parse this as *an array of eight 7-item subarrays (including the heaader) with the 1st and 3rd-to-last (i.e. 5th) columns parsed as numbers where possible*, shown below:
```json
[
["nums", "names", "cities", "date", "zone", "subzone", "contaminated"],
- ["nan", "Bluds", "BUS", "", 1, "''", "TRUE"],
+ ["nan", "Bluds", "BUS", "", 1, "", "TRUE"],
[0.5, "dfsd", "FUDG", "12/13/2020 0:00", 2, "c", "TRUE"],
["", "qere", "GOLAR", "", 3, "f", ""],
- [1.2, "qere", "'GOL\\'AR'", "", 3, "h", "TRUE"],
- ["''", "flodt", "'q,tun'", "", 4, "q", "FALSE"],
- [4.6, "Kjond", "", "", "", "w", "''"],
- [4.6, "'Kj\nond'", "YUNOB", "10/17/2014 0:00", 5, "z", "FALSE"]
+ [1.2, "qere", "GOL'AR", "", 3, "h", "TRUE"],
+ ["", "flodt", "q,tun", "", 4, "q", "FALSE"],
+ [4.6, "Kjond", "", "", "", "w", ""],
+ [4.6, "Kj\nond", "YUNOB", "10/17/2014 0:00", 5, "z", "FALSE"]
]
```
---
-`s_fa(x: string, pat: regex | string, ...: int) -> array[string | number] | array[array[string | number]]`
+`s_fa(x: string, pat: regex | string, includeFullMatchAsFirstItem: bool = false, ...: int) -> array[string | number] | array[array[string | number]]`
__Added in [v6.0](/CHANGELOG.md#600---unreleased-2023-mm-dd).__
-* If `pat` is a regex with no capture groups or one capture group, returns an array of the substrings of `x` that match `pat`.
-* If `pat` has multiple capture groups, returns an array of subarrays of substrings, where each subarray has a number of elements equal to the number of capture groups.
-* The third argument and any subsequent argument must all be the number of a capture group to attempt to parse as a number (`1` matches the match value if there were no capture groups). [Any valid number within the JSON5 specification](https://spec.json5.org/#numbers) can be parsed. If a capture group cannot be parsed as a number, the capture group is returned.
+* If the third argument, `includeFullMatchAsFirstItem`, is set to `false` (the default):
+ * If `pat` is a regex with *no capture groups or one capture group*, returns an array of the substrings of `x` that match `pat`.
+ * If `pat` has *multiple capture groups*, returns an array of subarrays of substrings, where each subarray has a number of elements equal to the number of capture groups.
+* otherwise:
+ * If `pat` is a regex with *no capture groups*, returns an array of the substrings of `x` that match `pat`.
+ * If `pat` has *at least one capture group*, returns an array of subarrays of substrings, where each subarray has a number of elements equal to the number of capture groups + 1, *and the first element of each subarray is the entire text of the match (including the uncaptured text)*.
+
+The fourth argument and any subsequent argument must all be the number of a capture group to attempt to parse as a number (`0` matches the match value if there were no capture groups). [Any valid number within the JSON5 specification](https://spec.json5.org/#numbers) can be parsed. If a capture group cannot be parsed as a number, the capture group is returned. As with `s_csv` above, you can use a negative number to parse the nth-to-last column as a number instead of the nth column as a numer.
__SPECIAL NOTES FOR `s_fa`:__
1. *`s_fa` treats `^` as the beginning of a line and `$` as the end of a line*, but elsewhere in JsonTools `^` matches only the beginning of the string and `$` matches only the end of the string.
2. Every instance of `(INT)` in `pat` will be replaced by a regex that captures a decimal number or (a hex number preceded by `0x`), optionally preceded by a `+` or `-`. A noncapturing regex that matches the same thing is available through `(?:INT)`.
3. Every instance of `(NUMBER)` in `pat` will be replaced by a regex that captures a decimal floating point number. A noncapturing regex that matches the same thing is available through `(?:NUMBER)`. *Neither `(NUMBER)` nor `(?:NUMBER)` matches `NaN` or `Infinity`, but those can be parsed if desired.*
-4. Although `s_csv` allows you to parse the nth-to-last column as a number with negative numbers for 6th and subsequent args, negative numbers *cannot* be used as optional args to this function to parse the nth-to-last capture groups as numbers.
+4. *`s_fa` may be very slow if `pat` is a function of input,* because the above described regex transformations need to be applied every time the function is called instead of just once at compile time.
__Examples:__
1. ``s_fa(`1 -1 +2 -0xF +0x1a 0x2B`, `(INT)`)`` will return `["1", "-1", "+2", "-0xF", "+0x1a", "0x2B"]`
-2. ``s_fa(`1 -1 +2 -0xF +0x1a 0x2B 0x10000000000000000`, `(?:INT)`, 1)`` will return `[1, -1, 2, -15, 26, 43, "0x10000000000000000"]` because passing `1` as the third arg caused all the match results to be parsed as integers, except `0x10000000000000000`, which stayed as a string because its numeric value was too big for the 64-bit integers used in JsonTools.
-3. ``s_fa(`a 1.5 1\r\nb -3e4 2\r\nc -.2 6`, `^(\w+) (NUMBER) (INT)\r?$`, 2)`` will return `[["a",1.5,"1"],["b",-30000.0,"2"],["c",-0.2,"6"]]`. Note that the second column but not the third will be parsed as a number, because only `2` was passed in as the number of a capture group to parse as a number.
-4. ``s_fa(`a 1.5 1\r\nb -3e4 2\r\nc -.2 6`, `^(\w+) (NUMBER) (INT)\r?$`, 2, 3)`` will return `[["a",1.5,1],["b",-30000.0,2],["c",-0.2,6]]`. This time the same input is parsed with numbers in the second and third columns because `2` and `3` were passed as optional args.
-5. ``s_fa(`a 1.5 1\r\nb -3e4 2\r\nc -.2 6`, `^(\w+) (?:NUMBER) (INT)\r?$`, 2)`` will return `[["a",1],["b",2],["c",6]]`. This time the same input is parsed with only two columns, because we used a noncapturing version of the number-matching regex
+2. ``s_fa(`1 -1 +2 -0xF +0x1a 0x2B 0x10000000000000000`, `(?:INT)`,false, 0)`` will return `[1, -1, 2, -15, 26, 43, "0x10000000000000000"]` because passing `0` as the fourth arg caused all the match results to be parsed as integers, except `0x10000000000000000`, which stayed as a string because its numeric value was too big for the 64-bit integers used in JsonTools.
+3. ``s_fa(`a 1.5 1\r\nb -3e4 2\r\nc -.2 6`, `^(\w+) (NUMBER) (INT)\r?$`,false, 1)`` will return `[["a",1.5,"1"],["b",-30000.0,"2"],["c",-0.2,"6"]]`. Note that the second column but not the third will be parsed as a number, because only `1` was passed in as the number of a capture group to parse as a number.
+4. ``s_fa(`a 1.5 1\r\nb -3e4 2\r\nc -.2 6`, `^(\w+) (NUMBER) (INT)\r?$`,false, -2, 2)`` will return `[["a",1.5,1],["b",-30000.0,2],["c",-0.2,6]]`. This time the same input is parsed with numbers in the second-to-last and third columns because `-2` and `2` were passed as optional args.
+5. ``s_fa(`a 1.5 1\r\nb -3e4 2\r\nc -.2 6`, `^(\w+) (?:NUMBER) (INT)\r?$`,false, 1)`` will return `[["a",1],["b",2],["c",6]]`. This time the same input is parsed with only two columns, because we used a noncapturing version of the number-matching regex.
+6. 1. ``s_fa(`a1 b+2 c-0xF d+0x1a`, `[a-z](INT)`, true, 1)`` will return `[["a1",1],["b+2",2],["c-0xF",-15],["d+0x1a",26]]` because the third argument is `true` and there is one capture group, meaning that the matches will be represented as two-element subarrays, with the first element being the full text of the match, and the second element being the captured integer parsed as a number.
+6. 1. ``s_fa(`a1 b+2 c-0xF d+0x1a`, `[a-z](?:INT)`, true)`` will return `["a1","b+2","c-0xF","d+0x1a"]` because the third argument is `true` but there are no capture groups, so an array of strings is returned instead of 1-element subarrays.
----
`s_find(x: string, sub: regex | string) -> array[string]`
diff --git a/docs/json-to-csv.md b/docs/json-to-csv.md
index f27707c..10b6250 100644
--- a/docs/json-to-csv.md
+++ b/docs/json-to-csv.md
@@ -1,6 +1,10 @@
# Converting JSON to CSVs #
This app uses an [algorithm](https://github.com/molsonkiko/JsonToolsNppPlugin/blob/main/JsonToolsNppPlugin/JSONTools/JsonTabularize.cs) based on analysis of a JSON iterable's [schema](https://github.com/molsonkiko/JsonToolsNppPlugin/blob/main/JsonToolsNppPlugin/JSONTools/JsonSchemaMaker.cs) (generated on the fly) to attempt to convert it into a table.
+As of [v6.0](/CHANGELOG.md#600---unreleased-2023-mm-dd), the CSV files generated by JsonTools use an algorithm similar to [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt), in that all values containing the quote character, `\r`, or `\n` must be wrapped in quotes, and quote characters are escaped by doubling them up.
+
+[Pre-v6.0 docs are here](https://github.com/molsonkiko/JsonToolsNppPlugin/blob/110cbb7d30c6a48cd2c7cfac3cb65534230b9c86/docs/json-to-csv.md).
+
At present, four [strategies](#strategies) for making a table are supported.
# Options #
@@ -128,8 +132,8 @@ For example, consider
This will be tabularized to
```
a,b,c
-1,"[1, 2, 3]",{"d": "y"}
-2,"[4, 5, 6]",{"d": "z"}
+1,"[1, 2, 3]","{""d"": ""y""}"
+2,"[4, 5, 6]","{""d"": ""z""}"
```
Meanwhile,
```json