Skip to content

Commit

Permalink
Add rand_schema RemesPath function
Browse files Browse the repository at this point in the history
Also improve performance of string generation in RandomJsonFromSchema
  • Loading branch information
molsonkiko committed Sep 25, 2024
1 parent 170a577 commit c9ea2ea
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 78 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Added

1. When a file is renamed, the name of a [tree view](/docs/README.md#json-tools-overview) associated with that file also changes to match the new name.
2. Add [`rand_schema` RemesPath function](/docs/RemesPath.md#non-vectorized-functions) to generate random JSON from schema.

### Changed

Expand Down
24 changes: 7 additions & 17 deletions JsonToolsNppPlugin/JSONTools/RandomJsonFromSchema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ private JNode RandomNull(JNode schema, JObject refs, int recursionDepth)
return new JNode();
}

private static readonly string PRINTABLE = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\r\n";
public static readonly string PRINTABLE = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\r\n";

private static readonly string EXTENDED_ASCII = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";

Expand Down Expand Up @@ -139,24 +139,14 @@ private JNode RandomString(JNode schema, JObject refs, int recursionDepth)
exclusiveMaxLength = (int)maxLengthVal + 1;
}
int length = random.Next(minLength, exclusiveMaxLength);
StringBuilder sb = new StringBuilder(length);
if (extendedAsciiStrings)
{
for (int ii = 0; ii < length; ii++)
{
char randChar = EXTENDED_ASCII[random.Next(1, 256)]; // not allowing \x00 because that will terminate string early in C
sb.Append(randChar);
}
}
else
char[] chars = new char[length];
for (int ii = 0; ii < length; ii++)
{
for (int ii = 0; ii < length; ii++)
{
char randChar = PRINTABLE[random.Next(PRINTABLE.Length)];
sb.Append(randChar);
}
chars[ii] = extendedAsciiStrings
? EXTENDED_ASCII[random.Next(1, 256)] // not allowing \x00 because that will terminate string early in C
: PRINTABLE[random.Next(PRINTABLE.Length)];
}
return new JNode(sb.ToString(), Dtype.STR, 0);
return new JNode(new string(chars), Dtype.STR, 0);
}
#endregion

Expand Down
11 changes: 11 additions & 0 deletions JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1630,6 +1630,16 @@ public static JNode RandomFrom0To1(List<JNode> args)
return new JNode(rand, Dtype.FLOAT, 0);
}

public static JNode RandomFromSchema(List<JNode> args)
{
JNode node = args[0];
int minArrayLength = args.Count >= 2 && args[1].value is long l && l < int.MaxValue && l >= 0 ? (int)l : 0;
int maxArrayLength = args.Count >= 3 && args[2].value is long l2 && l2 < int.MaxValue && l2 >= 0 ? (int)l2 : 10;
bool extendedAsciiStrings = args.Count >= 4 && args[3].value is bool b && b;
bool usePatterns = args.Count >= 5 && args[4].value is bool b2 && b2;
return RandomJsonFromSchema.RandomJson(node, minArrayLength, maxArrayLength, extendedAsciiStrings, usePatterns);
}

/// <summary>
/// randint(start: int, end: int=null) -> int<br></br>
/// returns a random integer from start (inclusive) to end (exclusive)<br></br>
Expand Down Expand Up @@ -3675,6 +3685,7 @@ public static JNode ObjectsToJNode(object obj)
["pivot"] = new ArgFunction(Pivot, "pivot", Dtype.OBJ, 3, int.MaxValue, false, new Dtype[] { Dtype.ARR, Dtype.STR | Dtype.INT, Dtype.STR | Dtype.INT, /* any # of args */ Dtype.STR | Dtype.INT }),
["quantile"] = new ArgFunction(Quantile, "quantile", Dtype.FLOAT, 2, 2, false, new Dtype[] {Dtype.ARR, Dtype.FLOAT}),
["rand"] = new ArgFunction(RandomFrom0To1, "rand", Dtype.FLOAT, 0, 0, false, new Dtype[] {}, false),
["rand_schema"] = new ArgFunction(RandomFromSchema, "rand_schema", Dtype.ANYTHING, 1, 5, false, new Dtype[] { Dtype.OBJ, Dtype.INT, Dtype.INT, Dtype.BOOL, Dtype.BOOL }, isDeterministic: false),
["randint"] = new ArgFunction(RandomInteger, "randint", Dtype.INT, 1, 2, false, new Dtype[] {Dtype.INT, Dtype.INT}, false),
["range"] = new ArgFunction(Range, "range", Dtype.ARR, 1, 3, false, new Dtype[] {Dtype.INT, Dtype.INT, Dtype.INT}),
["s_cat"] = new ArgFunction(StrCat, "s_cat", Dtype.STR, 1, int.MaxValue, false, new Dtype[] {Dtype.ANYTHING, /* any # of args */ Dtype.ANYTHING}),
Expand Down
4 changes: 2 additions & 2 deletions JsonToolsNppPlugin/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@
// Build Number
// Revision
//
[assembly: AssemblyVersion("8.1.0.9")]
[assembly: AssemblyFileVersion("8.1.0.9")]
[assembly: AssemblyVersion("8.1.0.10")]
[assembly: AssemblyFileVersion("8.1.0.10")]
54 changes: 44 additions & 10 deletions JsonToolsNppPlugin/Tests/RemesPathTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,6 @@ public static bool Test()
}
// the rand() and randint() functions require a special test because their outputs are nondeterministic
ii += 7;
bool testFailed = false;
string randints1argQuery = "flatten(@.foo)[:]->randint(1000)";
string randints2argQuery = "range(9)[:]->randint(-700, 800)";
string randintsIfElseQuery = "var stuff = j`[\"foo\", \"bar\", \"baz\", \"quz\"]`; range(17)[:]->at(stuff, abs(randint(-20, 20) % 4))";
Expand Down Expand Up @@ -814,22 +813,23 @@ public static bool Test()
testsFailed += 3;
Npp.AddLine($"While testing randint, got error {RemesParser.PrettifyException(ex)}");
}
for (int randNum = 0; randNum < 28 && !testFailed; randNum++)
bool allRandTestsFailed = false;
for (int randNum = 0; randNum < 28 && !allRandTestsFailed; randNum++)
{
// rand()
try
{
result = remesparser.Search("rand()", foo);
if (!(result.value is double d && d >= 0d && d < 1d))
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(rand(), foo) to return a double between 0 and 1, but instead got {result.ToString()}");
}
}
catch (Exception ex)
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(rand(), foo) to return a double between 0 and 1 but instead threw" +
$" an exception:\n{RemesParser.PrettifyException(ex)}");
Expand All @@ -840,14 +840,14 @@ public static bool Test()
result = remesparser.Search("ifelse(rand() < 0.5, a, b)", foo);
if (!(result.value is string s && (s == "a" || s == "b")))
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(ifelse(rand(), a, b), foo) to return \"a\" or \"b\", but instead got {result.ToString()}");
}
}
catch (Exception ex)
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(ifelse(rand(), a, b), foo) to return \"a\" or \"b\" but instead threw" +
$" an exception:\n{RemesParser.PrettifyException(ex)}");
Expand All @@ -858,15 +858,15 @@ public static bool Test()
result = remesparser.Search("j`[1,2,3]`[:]{rand()}[0]", foo);
if (!(result is JArray arr) || arr.children.All(x => x == arr[0]))
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(j`[1,2,3]`{{{{rand(@)}}}}[0], foo) to return array of doubles that aren't all equal" +
$", but instead got {result.ToString()}");
}
}
catch (Exception ex)
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(j`[1,2,3]`{{{{rand(@)}}}}[0] to return array of doubles that aren't equal, but instead threw" +
$" an exception:\n{RemesParser.PrettifyException(ex)}");
Expand All @@ -879,14 +879,14 @@ public static bool Test()
result = remesparser.Search(q, foo);
if (!(result is JArray arr && arr[0].value is double d1 && d1 >= 0 && d1 <= 1 && arr.children.All(x => x.value is double xd && xd == d1)))
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(\"{q}\", foo) to return an array where every value is the same double between 0 and 1, but instead got result {result.ToString()}");
}
}
catch (Exception ex)
{
testFailed = true;
allRandTestsFailed = true;
testsFailed++;
Npp.AddLine($"Expected remesparser.Search(\"{q}\", foo) to return an array where every value is the same double between 0 and 1, but instead got exception {RemesParser.PrettifyException(ex)}");
}
Expand Down Expand Up @@ -949,6 +949,40 @@ public static bool Test()
$"but instead got {result.ToString()}.");
}
}
// test that rand_schema correctly uses all args
var randFromSchemaTestcases = new (string query, Func<JNode, bool> passIf)[]
{
("rand_schema(j`{\"type\": \"boolean\"}`)", x => x.value is bool), // base case, simple schema and no args
("rand_schema(j`{\"type\": \"array\", \"items\": {\"type\": [\"integer\", \"null\"]}}`, 11, 13)", x => x is JArray jarr && jarr.Length >= 11 && jarr.Length <= 13 && jarr.children.All(y => y.type == Dtype.INT || y.type == Dtype.NULL)), // make sure minArrayLength and maxArrayLength are followed
("rand_schema(j`{\"type\": \"string\", \"pattern\": \"\\\\\\\\d{4,9}|foo\"}`,,,,true)", x => x.value is string s && (s == "foo" || (s.Length <= 9 && s.Length >= 4 && s.All(y => y.IsDigit())))), // make sure isPattern = true makes string follow regex
("rand_schema(j`{\"type\": \"string\", \"pattern\": \"\\\\\\\\d{4,9}|foo\"}`)", x => x.value is string s && !(s == "foo" || (s.Length <= 9 && s.Length >= 4 && s.All(y => y.IsDigit())))), // make sure isPattern = false makes string ignore regex
("rand_schema(j`{\"type\": \"array\", \"items\": {\"type\": \"boolean\"}}`)", x => x is JArray jarr && jarr.Length >= 0 && jarr.Length <= 10 && jarr.children.All(y => y.value is bool)), // make sure default array length range applies when no minArrayLength or maxArrayLength specified
("rand_schema(j`{\"type\": \"string\", \"maxLength\": 17, \"minLength\": 14}`,,,true)", x => x.value is string s && s.Length >= 14 && s.Length <= 17 && s.All(y => y >= 1 && y <= 255) && s.Any(y => RandomJsonFromSchema.PRINTABLE.IndexOf(y) < 0)), // make sure extendedAsciiStrings = true makes string extended ASCII
("rand_schema(j`{\"type\": \"string\", \"maxLength\": 12, \"minLength\": 9}`)", x => x.value is string s && s.Length >= 9 && s.Length <= 12 && s.All(y => RandomJsonFromSchema.PRINTABLE.IndexOf(y) >= 0)), // make sure extendedAsciiStrings = false makes string only printables
};
var jnull = new JNode();
foreach ((string query, Func<JNode, bool> passIf) in randFromSchemaTestcases)
{
ii++;
try
{
for (int jj = 0; jj < 6; jj++)
{
JNode node = remesparser.Search(query, jnull);
if (!passIf(node))
{
testsFailed++;
Npp.AddLine($"Failed test {query} because output {node.ToString()} did not pass test");
break;
}
}
}
catch (Exception ex)
{
testsFailed++;
Npp.AddLine($"Failed test {query} because of exception {ex}");
}
}
/**
* Test s_csv and s_fa caching (only need to use s_csv, because s_fa uses the same caching system)
**/
Expand Down
7 changes: 6 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ This tool can only validate the following keywords:

## Generating random JSON from a schema ##

The plugin can also generate random JSON from a schema. The default minimum and maximum array lengths (for schemas where the `minItems` and `maxItems` keywords are omitted) are `0` and `10` respectively.
The plugin can also generate random JSON from a schema. The default minimum and maximum array lengths (for schemas where the `minItems` and `maxItems` keywords are omitted) are `0` and `10` respectively, and can be customized with the `minArrayLength` and `maxArrayLength` settings.

*Added in version 4.8.1:* You can also use a non-schema file to generate random JSON. A schema will be generated on the fly, and that schema will be used to make the random JSON.

Expand Down Expand Up @@ -948,7 +948,12 @@ The following keywords are supported for random JSON generation:
### Keywords for strings ###

* [minLength](https://json-schema.org/draft/2020-12/json-schema-validation.html#name-minlength) and [maxLength](https://json-schema.org/draft/2020-12/json-schema-validation.html#name-maxlength) (*added in [7.1](/CHANGELOG.md#710---2024-02-28)*)
* if `minLength` is not specified, strings will have a minimum length of
* [pattern](https://json-schema.org/draft/2020-12/json-schema-validation.html#name-pattern) (*see note below on [random strings from regex](#random-strings-from-regex-added-in-v81)*)
* If there is no `pattern` keyword, or if `generate_random_patterns` is `False`, the following are all true:
* All strings will have fewer than 11 characters.
* If `extended_ascii_strings` is `False` (the default), the characters in each string will be drawn from the set of printable ASCII characters, ``"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\r\n"``
* If `extended_ascii_strings` is `True`, the characters in each string will have UTF-16 codes between 1 (`\x01`) and 255 (`ÿ`)

### Random strings from regex (*added in v8.1*)

Expand Down
5 changes: 5 additions & 0 deletions docs/RemesPath.md
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,11 @@ Then the returned value is `0.6*10 + 0.4*8`, or 9.2.

Random number between 0 (inclusive) and 1 (exclusive). *Added in [v5.2](/CHANGELOG.md#520---2023-06-04)*

---
`rand_schema(schema: object, minArrayLength: int = 0, maxArrayLength: int = 10, extendedAsciiStrings: bool = false, usePatterns: bool = false)`

Creates [random JSON from `schema`](/docs/README.md#generating-random-json-from-a-schema), where the four optional arguments take the place of the global settings `minArrayLength`, `maxArrayLength`, `extended_ascii_strings`, and `generate_random_patterns`, respectively.

---
`randint(start: int, end: int=null) -> int`

Expand Down
Loading

0 comments on commit c9ea2ea

Please sign in to comment.