diff --git a/JsonToolsNppPlugin/JSONTools/JNode.cs b/JsonToolsNppPlugin/JSONTools/JNode.cs index 19f20bb..a3a3709 100644 --- a/JsonToolsNppPlugin/JSONTools/JNode.cs +++ b/JsonToolsNppPlugin/JSONTools/JNode.cs @@ -1952,6 +1952,8 @@ public class JQueryContext : JNode /// Will the query mutate input? /// private bool mutatesInput = false; + public Dictionary Globals; + public bool UsesGlobals { get; private set; } = false; public override bool CanOperate => true; public override bool IsMutator => mutatesInput; private List statements; @@ -1975,25 +1977,32 @@ public class JQueryContext : JNode public JQueryContext() : base(null, Dtype.UNKNOWN, 0) { statements = new List(); + Globals = new Dictionary(); locals = new Dictionary(); cachedLocals = new Dictionary(); loopVariableAssignmentStack = new List(); tokenIndicesOfVariableReferences = new Dictionary(); } + private void InitializeGlobals() + { + if (UsesGlobals) + Globals = new Dictionary(); + } + /// /// Run all the statements, and return the result. /// public override JNode Operate(JNode inp) { - ArgFunction.InitializeGlobals(mutatesInput); + InitializeGlobals(); JNode lastStatement = EvaluateStatementsFromStartToEnd(inp, 0, statements.Count); Reset(); return lastStatement; } /// - /// a "simple query" is a query with one statement and no variable assignments.

+ /// a "simple query" is a query with one statement and no variable assignments and no mutation of this object's value

/// This definition is useful because up until JsonTools 5.7 it was the only kind of query possible.

/// If true, you can keep the first statement and discard the context. ///
@@ -2001,7 +2010,7 @@ public bool IsSimpleQuery { get { - if (statements.Count > 1) + if (statements.Count > 1 || UsesGlobals) return false; JNode firstStatement = statements[0]; return !(firstStatement is VarAssign); @@ -2095,30 +2104,7 @@ public JNode GetQuery() { if (IsSimpleQuery) { - JNode firstStatement = statements[0]; - if (firstStatement is CurJson cj) - { - Func fun = cj.function; - JNode outfunc(JNode x) - { - // need to reset globals before each evalutation - ArgFunction.InitializeGlobals(false); - return fun(x); - } - return new CurJson(cj.type, outfunc); - } - else if (firstStatement is JMutator jm && jm.selector is CurJson selector) - { - Func selectorFunc = selector.function; - JNode newSelector(JNode x) - { - ArgFunction.InitializeGlobals(true); - return selectorFunc(x); - } - jm.selector = new CurJson(selector.type, newSelector); - return jm; - } - return firstStatement; + return statements[0]; } // clear locals because it was necessary to use actual values while building for propagation Reset(); @@ -2224,7 +2210,6 @@ private JNode EvaluateStatementsFromStartToEnd(JNode inp, int start, int end) indexInStatements++; } } - ArgFunction.regexSearchResultsShouldBeCached = true; return lastStatement; } @@ -2233,6 +2218,7 @@ private JNode EvaluateStatementsFromStartToEnd(JNode inp, int start, int end) /// private void Reset() { + Globals = new Dictionary(); foreach (string varname in Varnames()) locals[varname] = null; cachedLocals.Clear(); diff --git a/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs b/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs index 782c28d..9e851f0 100644 --- a/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs +++ b/JsonToolsNppPlugin/JSONTools/RemesPathFunctions.cs @@ -982,6 +982,10 @@ public class ArgFunction /// and the and() and or() functions short-circuiting the same way as the corresponding binops in Python. /// public bool conditionalExecution { get; private set; } + /// + /// Does this function access the Globals attribute of a JQueryContext? + /// + public bool usesGlobals { get; private set; } /// /// A function whose arguments must be given in parentheses (e.g., len(x), concat(x, y), s_mul(abc, 3).

@@ -999,6 +1003,7 @@ public class ArgFunction /// if false, the function outputs a random value /// transformations that are applied to any number of arguments at compile time /// whether the function's excution of one or more arguments is conditional on something + /// Does this function access the Globals attribute of a JQueryContext? public ArgFunction(Func, JNode> function, string name, Dtype type, @@ -1008,7 +1013,8 @@ public ArgFunction(Func, JNode> function, Dtype[] inputTypes, bool isDeterministic = true, ArgsTransform argsTransform = null, - bool conditionalExecution = false) + bool conditionalExecution = false, + bool usesGlobals = false) { Call = function; this.name = name; @@ -1020,6 +1026,7 @@ public ArgFunction(Func, JNode> function, this.isDeterministic = isDeterministic; this.argsTransform = argsTransform; this.conditionalExecution = conditionalExecution; + this.usesGlobals = usesGlobals; } /// @@ -1100,20 +1107,20 @@ public void PadToMaxArgs(List args) } } - /// - /// there are currently three mutable public global variables that are set in queries:

- /// regexSearchResultsShouldBeCached, csvDelimiterInLastQuery, and csvQuoteCharInLastQuery.

- /// These all relate to s_csv and s_fa.

- /// This is basically a hack, because RemesPath does not currently support deep top-down introspection of a query's AST - /// to determine if s_csv or s_fa has been called in a certain way. - ///
- /// - public static void InitializeGlobals(bool containsMutation) - { - regexSearchResultsShouldBeCached = !containsMutation; - csvDelimiterInLastQuery = '\x00'; - csvQuoteCharInLastQuery = '\x00'; - } + ///// + ///// there are currently three mutable public global variables that are set in queries:

+ ///// , , and .

+ ///// These all relate to s_csv and s_fa.

+ ///// This is basically a hack, because RemesPath does not currently support deep top-down introspection of a query's AST + ///// to determine if s_csv or s_fa has been called in a certain way. + /////
+ ///// + //public static void InitializeGlobals(bool containsMutation) + //{ + // regexSearchResultsShouldBeCached = !containsMutation; + // csvDelimiterInLastQuery = '\x00'; + // csvQuoteCharInLastQuery = '\x00'; + //} #region NON_VECTORIZED_ARG_FUNCTIONS @@ -1660,7 +1667,8 @@ public static JNode RandomInteger(List args) /// loopCountValue = loopCountBeforeCall + 1; // this ensures that the next time curjson.function(inp) is called, /// it sees loopCountValue = 1 + the last value it saw ///
- private static long loopCountValue = -1; + //private static long loopCountValue = -1; + private const string LOOP_COUNT_VARNAME = "loop"; /// /// loop() -> int

@@ -1674,7 +1682,7 @@ public static JNode RandomInteger(List args) /// public static JNode LoopCount(List args) { - return new JNode(loopCountValue); + return new JNode((long)((JQueryContext)args[0]).Globals[LOOP_COUNT_VARNAME]); } /// @@ -2440,10 +2448,10 @@ public static JNode StrFind(List args) /// private static readonly int MAX_DOC_SIZE_CACHE_REGEX_SEARCH = IntPtr.Size * 1_250_000; - /// - /// if false, do not cache for any reason. This is usually because the query involves mutation and there is some danger of mutating a value in the cache. - /// - public static bool regexSearchResultsShouldBeCached = true; + ///// + ///// if false, do not cache for any reason. This is usually because the query involves mutation and there is some danger of mutating a value in the cache. + ///// + //public static bool regexSearchResultsShouldBeCached = true; private const int regexSearchResultCacheSize = 8; @@ -2453,8 +2461,10 @@ public static JNode StrFind(List args) ///
private static LruCache<(string input, string argsAsJArrayString), JNode> regexSearchResultCache = new LruCache<(string input, string argsAsJArrayString), JNode>(regexSearchResultCacheSize); - public static char csvDelimiterInLastQuery = '\x00'; - public static char csvQuoteCharInLastQuery = '\x00'; + //public static char csvDelimiterInLastQuery = '\x00'; + public const string CSV_DELIM_VARNAME = "csvDelim"; + //public static char csvQuoteCharInLastQuery = '\x00'; + public const string CSV_QUOTE_VARNAME = "csvQuoteChar"; /// /// parses a single CSV value (one column in one row) according to RFC 4180 (https://www.ietf.org/rfc/rfc4180.txt) @@ -2694,9 +2704,9 @@ public static IEnumerable EnumerateGroupsOfRegexMatch(string text, int ma } } - private static bool UseRegexSearchResultCache(string input) + private static bool UseRegexSearchResultCache(string input, JQueryContext context) { - return regexSearchResultsShouldBeCached && input.Length >= MIN_DOC_SIZE_CACHE_REGEX_SEARCH && input.Length <= MAX_DOC_SIZE_CACHE_REGEX_SEARCH; + return !context.IsMutator && input.Length >= MIN_DOC_SIZE_CACHE_REGEX_SEARCH && input.Length <= MAX_DOC_SIZE_CACHE_REGEX_SEARCH; } private static string ArgsAsJArrayString(Regex rex, HeaderHandlingInCsv headerHandling, int[] columnsToParseAsNumber) @@ -2718,18 +2728,18 @@ private static string ArgsAsJArrayString(Regex rex, HeaderHandlingInCsv headerHa /// if the regexSearchResultCache is usable for this input at this time, check if this combination of (input, regex, HeaderHandlingInCsv, columnsToParseAsNumber) /// is in the regexSearchResultCache and return the cached value if so. /// - private static bool TryGetCachedRegexSearchResults(string input, Regex rex, HeaderHandlingInCsv headerHandling, int[] columnsToParseAsNumber, out JNode cachedOutput) + private static bool TryGetCachedRegexSearchResults(string input, Regex rex, HeaderHandlingInCsv headerHandling, int[] columnsToParseAsNumber, JQueryContext context, out JNode cachedOutput) { cachedOutput = null; - if (!UseRegexSearchResultCache(input)) + if (!UseRegexSearchResultCache(input, context)) return false; string argsAsJArrayString = ArgsAsJArrayString(rex, headerHandling, columnsToParseAsNumber); return regexSearchResultCache.TryGetValue((input, argsAsJArrayString), out cachedOutput); } - private static void CacheResultsOfRegexSearch(string input, Regex rex, HeaderHandlingInCsv headerHandling, int[] columnsToParseAsNumber, JNode output) + private static void CacheResultsOfRegexSearch(string input, Regex rex, HeaderHandlingInCsv headerHandling, int[] columnsToParseAsNumber, JNode output, JQueryContext context) { - if (!UseRegexSearchResultCache(input)) + if (!UseRegexSearchResultCache(input, context)) return; string argsAsJArrayString = ArgsAsJArrayString(rex, headerHandling, columnsToParseAsNumber); regexSearchResultCache[(input, argsAsJArrayString)] = output; @@ -2751,7 +2761,7 @@ private static void CacheResultsOfRegexSearch(string input, Regex rex, HeaderHan /// the quote character (only used in s_csv) /// /// - public static JNode StrFindAllHelper(string text, Regex rex, List args, int firstOptionalArgNum, string funcName, int maxGroupNum=-1, HeaderHandlingInCsv headerHandling = HeaderHandlingInCsv.INCLUDE_HEADER_ROWS_AS_ARRAYS, char csvQuoteChar = '\x00') + public static JNode StrFindAllHelper(string text, Regex rex, List args, int firstOptionalArgNum, string funcName, JQueryContext context, int maxGroupNum=-1, HeaderHandlingInCsv headerHandling = HeaderHandlingInCsv.INCLUDE_HEADER_ROWS_AS_ARRAYS, char csvQuoteChar = '\x00') { int[] columnsToParseAsNumber = ColumnNumbersToParseAsNumber(args, firstOptionalArgNum, maxGroupNum, funcName); Array.Sort(columnsToParseAsNumber); @@ -2801,7 +2811,7 @@ JNode matchEvaluator(string mValue, bool tryParseAsNumber, int jnodePosition) } Array.Sort(columnsToParseAsNumber); } - if (isFirstRow && TryGetCachedRegexSearchResults(text, rex, headerHandling, columnsToParseAsNumber, out output)) + if (isFirstRow && TryGetCachedRegexSearchResults(text, rex, headerHandling, columnsToParseAsNumber, context, out output)) return output; bool parseMatchesAsRow = headerHandling == HeaderHandlingInCsv.INCLUDE_HEADER_ROWS_AS_ARRAYS || headerHandling == HeaderHandlingInCsv.INCLUDE_FULL_MATCH_AS_FIRST_ITEM || !isFirstRow; if (nColumns == 1) @@ -2888,7 +2898,7 @@ JNode matchEvaluator(string mValue, bool tryParseAsNumber, int jnodePosition) isFirstRow = false; } output = new JArray(0, rows); - CacheResultsOfRegexSearch(text, rex, headerHandling, columnsToParseAsNumber, output); + CacheResultsOfRegexSearch(text, rex, headerHandling, columnsToParseAsNumber, output, context); return output; } @@ -2912,16 +2922,17 @@ public static JNode CsvRead(List args) if (arg2.type != Dtype.INT) throw new RemesPathArgumentException(null, 1, FUNCTIONS["s_csv"], arg2.type); int nColumns = Convert.ToInt32(arg2.value); + var context = (JQueryContext)args[3]; char delim = args.Count > 2 ? ((string)args[2].value)[0] : ','; - csvDelimiterInLastQuery = delim; + context.Globals[CSV_DELIM_VARNAME] = delim; string newline = args.Count > 3 ? (string)args[3].value : "\r\n"; char quote = args.Count > 4 ? ((string)args[4].value)[0] : '"'; - csvQuoteCharInLastQuery = quote; + context.Globals[CSV_QUOTE_VARNAME] = quote; string headerHandlingAbbrev = args.Count > 5 ? (string)args[5].value : "n"; if (!HEADER_HANDLING_ABBREVIATIONS.TryGetValue(headerHandlingAbbrev, out HeaderHandlingInCsv headerHandling)) throw new RemesPathArgumentException("header_handling (6th argument, default 'n') must be 'n' (no header, rows as arrays), 'h' (include header, rows as arrays), or 'd' (rows as objects with header as keys)", 5, FUNCTIONS["s_csv"]); string rexPat = CsvRowRegex(nColumns, delim, newline, quote); - return (JArray)StrFindAllHelper(text, new Regex(rexPat, RegexOptions.Compiled), args, 6, "s_csv", nColumns, headerHandling, quote); + return (JArray)StrFindAllHelper(text, new Regex(rexPat, RegexOptions.Compiled), args, 6, "s_csv", context, nColumns, headerHandling, quote); } /// @@ -3015,7 +3026,7 @@ public static JNode StrFindAll(List args) Regex rex = ((JRegex)args[1]).regex; bool includeFullMatchAsFirstItem = args.Count > 2 && (bool)args[2].value; HeaderHandlingInCsv headerHandling = includeFullMatchAsFirstItem ? HeaderHandlingInCsv.INCLUDE_FULL_MATCH_AS_FIRST_ITEM : HeaderHandlingInCsv.INCLUDE_HEADER_ROWS_AS_ARRAYS; - return StrFindAllHelper(text, rex, args, 3, "s_fa", -1, headerHandling); + return StrFindAllHelper(text, rex, args, 3, "s_fa", context, -1, headerHandling); } /// @@ -3133,8 +3144,10 @@ public static JNode StrSub(List args) if (repl is CurJson cj) { Func fun = cj.function; - long previousLoopCountValue = loopCountValue; - loopCountValue = 0; + var globals = ((JQueryContext)args[3]).Globals; + if (!(globals.TryGetValue(LOOP_COUNT_VARNAME, out object previousLoopCountNode) && previousLoopCountNode is long previousLoopCount)) + previousLoopCount = -1; + globals[LOOP_COUNT_VARNAME] = 0; string replacementFunction(Match m) { int groupCount = m.Groups.Count; @@ -3143,14 +3156,13 @@ string replacementFunction(Match m) { matchArrChildren.Add(new JNode(m.Groups[ii].Value)); } - long loopCountBeforeCall = loopCountValue; + var loopCountBeforeCall = (long)globals[LOOP_COUNT_VARNAME]; var matchArr = new JArray(0, matchArrChildren); - loopCountValue = loopCountBeforeCall + 1; + globals[LOOP_COUNT_VARNAME] = loopCountBeforeCall + 1; return (string)fun(matchArr).value; } JNode resultNode = new JNode(regex.Replace(val, replacementFunction)); - // reset to 0 so that it can't be used anywhere else - loopCountValue = previousLoopCountValue; + globals[LOOP_COUNT_VARNAME] = previousLoopCount; return resultNode; } else @@ -3649,7 +3661,7 @@ public static JNode ObjectsToJNode(object obj) ["iterable"] = new ArgFunction(IsExpr, "iterable", Dtype.BOOL, 1, 1, false, new Dtype[] {Dtype.ANYTHING}), ["keys"] = new ArgFunction(Keys, "keys", Dtype.ARR, 1, 1, false, new Dtype[] {Dtype.OBJ}), ["len"] = new ArgFunction(Len, "len", Dtype.INT, 1, 1, false, new Dtype[] {Dtype.ITERABLE}), - ["loop"] = new ArgFunction(LoopCount, "loop", Dtype.INT, 0, 0, false, new Dtype[] {}, false), + ["loop"] = new ArgFunction(LoopCount, "loop", Dtype.INT, 0, 0, false, new Dtype[] {}, false, usesGlobals: true), ["max"] = new ArgFunction(Max, "max", Dtype.FLOAT, 1, 1, false, new Dtype[] {Dtype.ARR}), ["max_by"] = new ArgFunction(MaxBy, "max_by", Dtype.ANYTHING, 2, 2, false, new Dtype[] {Dtype.ARR, Dtype.STR | Dtype.INT | Dtype.FUNCTION}), ["mean"] = new ArgFunction(Mean, "mean", Dtype.FLOAT, 1, 1, false, new Dtype[] {Dtype.ARR}), @@ -3692,8 +3704,8 @@ public static JNode ObjectsToJNode(object obj) ["parse"] = new ArgFunction(Parse, "parse", Dtype.OBJ, 1, 1, true, new Dtype[] { Dtype.STR | Dtype.ITERABLE }), ["round"] = new ArgFunction(Round, "round", Dtype.FLOAT_OR_INT, 1, 2, true, new Dtype[] {Dtype.FLOAT_OR_INT | Dtype.ITERABLE, Dtype.INT}), ["s_count"] = new ArgFunction(StrCount, "s_count", Dtype.INT, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX}), - ["s_csv"] = new ArgFunction(CsvRead, "s_csv", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.INT, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((2, Dtype.NULL, x => new JNode(",")), (3, Dtype.NULL, x => new JNode("\r\n")), (4, Dtype.NULL, x => new JNode("\"")), (5, Dtype.NULL, x => new JNode("n")))), - ["s_fa"] = new ArgFunction(StrFindAll, "s_fa", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] { Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.BOOL | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((1, Dtype.STR_OR_REGEX, TransformRegex), (2, Dtype.NULL, x => new JNode(false)))), + ["s_csv"] = new ArgFunction(CsvRead, "s_csv", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.INT, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.STR | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((2, Dtype.NULL, x => new JNode(",")), (3, Dtype.NULL, x => new JNode("\r\n")), (4, Dtype.NULL, x => new JNode("\"")), (5, Dtype.NULL, x => new JNode("n"))), usesGlobals: true), + ["s_fa"] = new ArgFunction(StrFindAll, "s_fa", Dtype.ARR, 2, int.MaxValue, true, new Dtype[] { Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.BOOL | Dtype.NULL, Dtype.INT}, true, new ArgsTransform((1, Dtype.STR_OR_REGEX, TransformRegex), (2, Dtype.NULL, x => new JNode(false))), usesGlobals: true), ["s_find"] = new ArgFunction(StrFind, "s_find", Dtype.ARR, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.REGEX}), ["s_format"] = new ArgFunction(StrFormat, "s_format", Dtype.STR, 1, 5, true, new Dtype[] { Dtype.ANYTHING, Dtype.NULL | Dtype.STR, Dtype.BOOL | Dtype.STR, Dtype.INT | Dtype.STR | Dtype.NULL, Dtype.BOOL | Dtype.NULL }), ["s_len"] = new ArgFunction(StrLen, "s_len", Dtype.INT, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}), @@ -3705,7 +3717,7 @@ public static JNode ObjectsToJNode(object obj) ["s_slice"] = new ArgFunction(StrSlice, "s_slice", Dtype.STR, 2, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.INT_OR_SLICE}), ["s_split"] = new ArgFunction(StrSplit, "s_split", Dtype.ARR, 1, 2, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX}), ["s_strip"] = new ArgFunction(StrStrip, "s_strip", Dtype.STR, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}), - ["s_sub"] = new ArgFunction(StrSub, "s_sub", Dtype.STR, 3, 3, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.STR | Dtype.FUNCTION}, true, new ArgsTransform((1, Dtype.REGEX, TransformRegex))), + ["s_sub"] = new ArgFunction(StrSub, "s_sub", Dtype.STR, 3, 3, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE, Dtype.STR_OR_REGEX, Dtype.STR | Dtype.FUNCTION}, true, new ArgsTransform((1, Dtype.REGEX, TransformRegex)), usesGlobals: true), ["s_upper"] = new ArgFunction(StrUpper, "s_upper", Dtype.STR, 1, 1, true, new Dtype[] {Dtype.STR | Dtype.ITERABLE}), ["str"] = new ArgFunction(ToStr, "str", Dtype.STR, 1, 1, true, new Dtype[] {Dtype.ANYTHING}), ["zfill"] = new ArgFunction(ZFill, "zfill", Dtype.STR, 2, 2, true, new Dtype[] { Dtype.ANYTHING, Dtype.INT | Dtype.ITERABLE}), diff --git a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs index 82366c0..22c1e69 100644 --- a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs +++ b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs @@ -28,5 +28,5 @@ // Build Number // Revision // -[assembly: AssemblyVersion("7.2.0.0")] -[assembly: AssemblyFileVersion("7.2.0.0")] +[assembly: AssemblyVersion("7.2.0.1")] +[assembly: AssemblyFileVersion("7.2.0.1")]