From 7e2fa82d1a1e2eb199a4b3acf1a031a1b5bba269 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Sun, 26 Nov 2017 19:59:12 +0000 Subject: [PATCH 1/3] [feature] Add support for non-capturing groups to XQuery 3.0 regular expressions --- .../xquery/regex/JDK15RegexTranslator.java | 62 +++++++++++++++++-- .../exist/xquery/regex/RegexTranslator.java | 3 +- test/src/xquery/regex.xml | 15 +++++ 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/org/exist/xquery/regex/JDK15RegexTranslator.java b/src/org/exist/xquery/regex/JDK15RegexTranslator.java index 069ab883f3a..3b7308f3adb 100644 --- a/src/org/exist/xquery/regex/JDK15RegexTranslator.java +++ b/src/org/exist/xquery/regex/JDK15RegexTranslator.java @@ -5,6 +5,7 @@ import org.exist.util.FastStringBuffer; import org.exist.util.UTF16CharacterSet; +import org.exist.util.XMLString; import org.exist.xquery.value.StringValue; /** @@ -18,6 +19,8 @@ * characters, since JDK 1.5 handles these natively. * * Copied from Saxon-HE 9.2 package net.sf.saxon.regex. + * + * Updated for Non-capturing Groups in XQuery 3.0 by Adam Retter */ public class JDK15RegexTranslator extends RegexTranslator { @@ -450,11 +453,20 @@ protected boolean translateAtom() throws RegexSyntaxException { return false; case '(': copyCurChar(); - final int thisCapture = ++currentCapture; - translateRegExp(); - expect(')'); - captures.add(thisCapture); - copyCurChar(); + final boolean nonCapturing = isNonCapturing(); + if(nonCapturing) { + copyCurChar(); // ? + copyCurChar(); // : + translateRegExp(); + expect(')'); + copyCurChar(); + } else { + final int thisCapture = ++currentCapture; + translateRegExp(); + expect(')'); + captures.add(thisCapture); + copyCurChar(); + } return true; case '\\': advance(); @@ -506,6 +518,46 @@ protected boolean translateAtom() throws RegexSyntaxException { return true; } + private boolean isNonCapturing() { + int localPos = pos; + if (localPos + 1 < length) { + char localChar = curChar; + + if (ignoreWhitespace) { + while (XMLString.isWhiteSpace(localChar)) { + if (localPos + 1 < length) { + localChar = regExp.charAt(localPos++); + } else { + return false; + } + } + } + + if(localChar == '?') { + + if (localPos + 1 < length) { + localChar = regExp.charAt(localPos++); + + if (ignoreWhitespace) { + while (XMLString.isWhiteSpace(localChar)) { + if (localPos + 1 < length) { + localChar = regExp.charAt(localPos++); + } else { + return false; + } + } + } + + if(localChar == ':') { + return true; + } + } + } + } + + return false; + } + private static CharClass makeNameCharClass(byte mask) { final List ranges = new ArrayList(); // Add colon to the set of characters matched diff --git a/src/org/exist/xquery/regex/RegexTranslator.java b/src/org/exist/xquery/regex/RegexTranslator.java index 30a742b2be8..9fb0db0c0bc 100644 --- a/src/org/exist/xquery/regex/RegexTranslator.java +++ b/src/org/exist/xquery/regex/RegexTranslator.java @@ -4,6 +4,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Set; import org.exist.util.FastStringBuffer; import org.exist.util.UTF16CharacterSet; @@ -28,7 +29,7 @@ public abstract class RegexTranslator { protected char curChar; protected boolean eos = false; protected int currentCapture = 0; - protected HashSet captures = new HashSet(); //IntHashSet + protected Set captures = new HashSet<>(); protected final FastStringBuffer result = new FastStringBuffer(64); protected void translateTop() throws RegexSyntaxException { diff --git a/test/src/xquery/regex.xml b/test/src/xquery/regex.xml index f7581a040db..301e152af84 100644 --- a/test/src/xquery/regex.xml +++ b/test/src/xquery/regex.xml @@ -77,6 +77,21 @@ fn:replace("a/b/c", "/", "$", "q") a$b$c + + fn:replace-capturing-1 + fn:replace("hello", "hel(lo)", "$1") + lo + + + fn:replace-non-capturing-1 + fn:replace("hello", "hel(?:lo)", "$1") + FORX0001 + + + fn:replace-non-capturing-2 + fn:replace("hello", "h(?:el(lo))", "$1") + lo + fn:tokenize-qflag-1 From 9c652d3e322c9c26ef85dcd5a165c218f628f763 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Sun, 26 Nov 2017 20:06:09 +0000 Subject: [PATCH 2/3] [bugfix] fn:analyze-string must adhere to XQuery Regular Expression syntax --- .../xquery/functions/fn/FunAnalyzeString.java | 97 +++++++++++++------ 1 file changed, 68 insertions(+), 29 deletions(-) diff --git a/src/org/exist/xquery/functions/fn/FunAnalyzeString.java b/src/org/exist/xquery/functions/fn/FunAnalyzeString.java index 06a9c7a396c..0c0e9f58338 100644 --- a/src/org/exist/xquery/functions/fn/FunAnalyzeString.java +++ b/src/org/exist/xquery/functions/fn/FunAnalyzeString.java @@ -11,6 +11,8 @@ import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; +import org.exist.xquery.regex.JDK15RegexTranslator; +import org.exist.xquery.regex.RegexSyntaxException; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.NodeValue; @@ -19,6 +21,7 @@ import org.exist.xquery.value.Type; import org.xml.sax.helpers.AttributesImpl; +import javax.annotation.Nullable; import javax.xml.XMLConstants; /** @@ -103,14 +106,15 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro return (NodeValue)builder.getDocument().getDocumentElement(); } - private void analyzeString(final MemTreeBuilder builder, final String input, final String pattern, final String flags) throws XPathException { - final Pattern ptn; - if (flags != null) { - final int iFlags = parseStringFlags(flags); - ptn = PatternFactory.getInstance().getPattern(pattern, iFlags); - } else { - ptn = PatternFactory.getInstance().getPattern(pattern); + private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, final String flags) throws XPathException { + + final int iFlags = parseStringFlags(flags); + + if(!hasLiteral(iFlags)) { + pattern = translateRegexp(pattern, hasIgnoreWhitespace(iFlags), hasCaseInsensitive(iFlags)); } + + final Pattern ptn = PatternFactory.getInstance().getPattern(pattern, iFlags); final Matcher matcher = ptn.matcher(input); @@ -182,31 +186,66 @@ private void nonMatch(final MemTreeBuilder builder, final String nonMatch) { builder.endElement(); } - private int parseStringFlags(final String flags) { + private int parseStringFlags(@Nullable final String flags) { int iFlags = 0; - for (final char c : flags.toCharArray()) { - switch(c) { - case 's': - iFlags |= Pattern.DOTALL; - break; - - case 'm': - iFlags |= Pattern.MULTILINE; - break; - - case 'i': - iFlags |= Pattern.CASE_INSENSITIVE; - break; - - case 'x' : - iFlags |= Pattern.COMMENTS; - break; - - case 'q' : - iFlags |= Pattern.LITERAL; - break; + if(flags != null) { + for (final char c : flags.toCharArray()) { + switch (c) { + case 's': + iFlags |= Pattern.DOTALL; + break; + + case 'm': + iFlags |= Pattern.MULTILINE; + break; + + case 'i': + iFlags |= Pattern.CASE_INSENSITIVE; + break; + + case 'x': + iFlags |= Pattern.COMMENTS; + break; + + case 'q': + iFlags |= Pattern.LITERAL; + break; + } } } return iFlags; } + + private boolean hasLiteral(final int flags) { + return (flags & Pattern.LITERAL) != 0; + } + + private boolean hasCaseInsensitive(final int flags) { + return (flags & Pattern.CASE_INSENSITIVE) != 0 || (flags & Pattern.UNICODE_CASE) != 0; + } + + private boolean hasIgnoreWhitespace(final int flags) { + return (flags & Pattern.COMMENTS) != 0; + } + + /** + * Translates the regular expression from XPath2 syntax to java regex + * syntax. + * + * @param pattern a String containing a regular expression in the syntax of XML Schemas Part 2 + * @param ignoreWhitespace true if whitespace is to be ignored ('x' flag) + * @param caseBlind true if case is to be ignored ('i' flag) + * @return The translated regexp + * @throws XPathException + */ + protected String translateRegexp(final String pattern, final boolean ignoreWhitespace, final boolean caseBlind) throws XPathException { + // convert pattern to Java regex syntax + try { + final int xmlVersion = 11; + return JDK15RegexTranslator.translate(pattern, xmlVersion, true, ignoreWhitespace, caseBlind); + } catch (final RegexSyntaxException e) { + throw new XPathException(this, "Conversion from XPath2 to Java regular expression " + + "syntax failed: " + e.getMessage(), e); + } + } } From 29c4489c93b505eafccd2ca4ba30bf2612a278ca Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Sun, 26 Nov 2017 20:26:43 +0000 Subject: [PATCH 3/3] [refactor] Reduce the ammount of duplicate code between regex functions --- .../xquery/functions/fn/FunAnalyzeString.java | 72 +--------- .../exist/xquery/functions/fn/FunMatches.java | 83 ++--------- .../exist/xquery/functions/fn/FunReplace.java | 6 +- .../xquery/functions/fn/FunTokenize.java | 5 +- src/org/exist/xquery/regex/RegexUtil.java | 132 ++++++++++++++++++ 5 files changed, 153 insertions(+), 145 deletions(-) create mode 100644 src/org/exist/xquery/regex/RegexUtil.java diff --git a/src/org/exist/xquery/functions/fn/FunAnalyzeString.java b/src/org/exist/xquery/functions/fn/FunAnalyzeString.java index 0c0e9f58338..d66d298d5d8 100644 --- a/src/org/exist/xquery/functions/fn/FunAnalyzeString.java +++ b/src/org/exist/xquery/functions/fn/FunAnalyzeString.java @@ -11,8 +11,6 @@ import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; -import org.exist.xquery.regex.JDK15RegexTranslator; -import org.exist.xquery.regex.RegexSyntaxException; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.NodeValue; @@ -21,9 +19,10 @@ import org.exist.xquery.value.Type; import org.xml.sax.helpers.AttributesImpl; -import javax.annotation.Nullable; import javax.xml.XMLConstants; +import static org.exist.xquery.regex.RegexUtil.*; + /** * XPath and XQuery 3.0 F+O fn:analyze-string() * @@ -108,10 +107,10 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, final String flags) throws XPathException { - final int iFlags = parseStringFlags(flags); + final int iFlags = parseFlags(this, flags); if(!hasLiteral(iFlags)) { - pattern = translateRegexp(pattern, hasIgnoreWhitespace(iFlags), hasCaseInsensitive(iFlags)); + pattern = translateRegexp(this, pattern, hasIgnoreWhitespace(iFlags), hasCaseInsensitive(iFlags)); } final Pattern ptn = PatternFactory.getInstance().getPattern(pattern, iFlags); @@ -185,67 +184,4 @@ private void nonMatch(final MemTreeBuilder builder, final String nonMatch) { builder.characters(nonMatch); builder.endElement(); } - - private int parseStringFlags(@Nullable final String flags) { - int iFlags = 0; - if(flags != null) { - for (final char c : flags.toCharArray()) { - switch (c) { - case 's': - iFlags |= Pattern.DOTALL; - break; - - case 'm': - iFlags |= Pattern.MULTILINE; - break; - - case 'i': - iFlags |= Pattern.CASE_INSENSITIVE; - break; - - case 'x': - iFlags |= Pattern.COMMENTS; - break; - - case 'q': - iFlags |= Pattern.LITERAL; - break; - } - } - } - return iFlags; - } - - private boolean hasLiteral(final int flags) { - return (flags & Pattern.LITERAL) != 0; - } - - private boolean hasCaseInsensitive(final int flags) { - return (flags & Pattern.CASE_INSENSITIVE) != 0 || (flags & Pattern.UNICODE_CASE) != 0; - } - - private boolean hasIgnoreWhitespace(final int flags) { - return (flags & Pattern.COMMENTS) != 0; - } - - /** - * Translates the regular expression from XPath2 syntax to java regex - * syntax. - * - * @param pattern a String containing a regular expression in the syntax of XML Schemas Part 2 - * @param ignoreWhitespace true if whitespace is to be ignored ('x' flag) - * @param caseBlind true if case is to be ignored ('i' flag) - * @return The translated regexp - * @throws XPathException - */ - protected String translateRegexp(final String pattern, final boolean ignoreWhitespace, final boolean caseBlind) throws XPathException { - // convert pattern to Java regex syntax - try { - final int xmlVersion = 11; - return JDK15RegexTranslator.translate(pattern, xmlVersion, true, ignoreWhitespace, caseBlind); - } catch (final RegexSyntaxException e) { - throw new XPathException(this, "Conversion from XPath2 to Java regular expression " + - "syntax failed: " + e.getMessage(), e); - } - } } diff --git a/src/org/exist/xquery/functions/fn/FunMatches.java b/src/org/exist/xquery/functions/fn/FunMatches.java index 36573f9717c..90020e36c4a 100644 --- a/src/org/exist/xquery/functions/fn/FunMatches.java +++ b/src/org/exist/xquery/functions/fn/FunMatches.java @@ -30,8 +30,6 @@ import org.exist.storage.NativeValueIndex; import org.exist.util.PatternFactory; import org.exist.xquery.pragmas.Optimize; -import org.exist.xquery.regex.JDK15RegexTranslator; -import org.exist.xquery.regex.RegexSyntaxException; import org.exist.xquery.*; import org.exist.xquery.util.Error; import org.exist.xquery.value.BooleanValue; @@ -48,6 +46,8 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import static org.exist.xquery.regex.RegexUtil.*; + /** * Implements the fn:matches() function. * @@ -214,7 +214,7 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP final int flags; if(getSignature().getArgumentCount() == 3) { final String flagsArg = getArgument(2).eval(contextSequence).getStringValue(); - flags = parseFlags(flagsArg); + flags = parseFlags(this, flagsArg); } else { flags = 0; } @@ -232,7 +232,7 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP } else { final boolean ignoreWhitespace = hasIgnoreWhitespace(flags); final boolean caseBlind = !caseSensitive; - pattern = translateRegexp(getArgument(1).eval(contextSequence).getStringValue(), ignoreWhitespace, caseBlind); + pattern = translateRegexp(this, getArgument(1).eval(contextSequence).getStringValue(), ignoreWhitespace, caseBlind); } } @@ -250,18 +250,6 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP return preselectResult; } - protected boolean hasLiteral(final int flags) { - return (flags & Pattern.LITERAL) != 0; - } - - protected boolean hasCaseInsensitive(final int flags) { - return (flags & Pattern.CASE_INSENSITIVE) != 0 || (flags & Pattern.UNICODE_CASE) != 0; - } - - protected boolean hasIgnoreWhitespace(final int flags) { - return (flags & Pattern.COMMENTS) != 0; - } - @Override public int getDependencies() { final Expression stringArg = getArgument(0); @@ -386,7 +374,7 @@ private Sequence evalWithIndex(Sequence contextSequence, Item contextItem, Seque final int flags; if(getSignature().getArgumentCount() == 3) { final String flagsArg = getArgument(2).eval(contextSequence, contextItem).getStringValue(); - flags = parseFlags(flagsArg); + flags = parseFlags(this, flagsArg); } else { flags = 0; } @@ -406,7 +394,7 @@ private Sequence evalWithIndex(Sequence contextSequence, Item contextItem, Seque } else { final boolean ignoreWhitespace = hasIgnoreWhitespace(flags); final boolean caseBlind = !caseSensitive; - pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); + pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); } } @@ -480,27 +468,6 @@ private Sequence evalFallback(NodeSet nodes, String pattern, int flags, int inde return result; } - /** - * Translates the regular expression from XPath2 syntax to java regex - * syntax. - * - * @param pattern a String containing a regular expression in the syntax of XML Schemas Part 2 - * @param ignoreWhitespace true if whitespace is to be ignored ('x' flag) - * @param caseBlind true if case is to be ignored ('i' flag) - * @return The translated regexp - * @throws XPathException - */ - protected String translateRegexp(final String pattern, final boolean ignoreWhitespace, final boolean caseBlind) throws XPathException { - // convert pattern to Java regex syntax - try { - final int xmlVersion = 11; - return JDK15RegexTranslator.translate(pattern, xmlVersion, true, ignoreWhitespace, caseBlind); - } catch (final RegexSyntaxException e) { - throw new XPathException(this, "Conversion from XPath2 to Java regular expression " + - "syntax failed: " + e.getMessage(), e); - } - } - /** * @param contextSequence * @param contextItem @@ -513,7 +480,7 @@ private Sequence evalGeneric(Sequence contextSequence, Item contextItem, Sequenc final int flags; if(getSignature().getArgumentCount() == 3) { - flags = parseFlags(getArgument(2).eval(contextSequence, contextItem).getStringValue()); + flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem).getStringValue()); } else { flags = 0; } @@ -529,7 +496,7 @@ private Sequence evalGeneric(Sequence contextSequence, Item contextItem, Sequenc } else { final boolean ignoreWhitespace = hasIgnoreWhitespace(flags); final boolean caseBlind = hasCaseInsensitive(flags); - pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); + pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); } } @@ -560,43 +527,13 @@ private boolean match(String string, String pattern, int flags) throws XPathExce } } - protected final static int parseFlags(final String s) throws XPathException { - int flags = 0; - for(int i = 0; i < s.length(); i++) { - final char ch = s.charAt(i); - switch(ch) { - case 'm': - flags |= Pattern.MULTILINE; - break; - case 'i': - flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; - break; - case 'x': - flags |= Pattern.COMMENTS; - break; - case 's': - flags |= Pattern.DOTALL; - break; - case 'q' : - flags |= Pattern.LITERAL; - break; - default: - throw new XPathException("err:FORX0001: Invalid regular expression flag: " + ch); - } - } - return flags; - } - + @Override public void reset() { super.reset(); hasUsedIndex = false; } - /* - * (non-Javadoc) - * - * @see org.exist.xquery.AbstractExpression#resetState() - */ + @Override public void resetState(boolean postOptimization) { super.resetState(postOptimization); if (!postOptimization) diff --git a/src/org/exist/xquery/functions/fn/FunReplace.java b/src/org/exist/xquery/functions/fn/FunReplace.java index 74aafa9fd24..959c1527684 100644 --- a/src/org/exist/xquery/functions/fn/FunReplace.java +++ b/src/org/exist/xquery/functions/fn/FunReplace.java @@ -46,6 +46,8 @@ import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; +import static org.exist.xquery.regex.RegexUtil.*; + /** * @author Wolfgang Meier (wolfgang@exist-db.org) */ @@ -166,7 +168,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc } else { final int flags; if (getSignature().getArgumentCount() == 4) { - flags = parseFlags(getArgument(3).eval(contextSequence, contextItem).getStringValue()); + flags = parseFlags(this, getArgument(3).eval(contextSequence, contextItem).getStringValue()); } else { flags = 0; } @@ -187,7 +189,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc .replace("\\", "\\\\") .replace("$", "\\$"); } else { - pattern = translateRegexp(patternSeq.getStringValue(), hasIgnoreWhitespace(flags), hasCaseInsensitive(flags)); + pattern = translateRegexp(this, patternSeq.getStringValue(), hasIgnoreWhitespace(flags), hasCaseInsensitive(flags)); } //An error is raised [err:FORX0004] if the value of $replacement contains a "$" character that is not immediately followed by a digit 0-9 and not immediately preceded by a "\". diff --git a/src/org/exist/xquery/functions/fn/FunTokenize.java b/src/org/exist/xquery/functions/fn/FunTokenize.java index 3d9aefb2b78..aa92bae4dae 100644 --- a/src/org/exist/xquery/functions/fn/FunTokenize.java +++ b/src/org/exist/xquery/functions/fn/FunTokenize.java @@ -40,6 +40,7 @@ import org.exist.xquery.value.ValueSequence; import static org.exist.xquery.FunctionDSL.*; +import static org.exist.xquery.regex.RegexUtil.*; /** * @author Wolfgang Meier (wolfgang@exist-db.org) @@ -100,7 +101,7 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } else { final int flags; if (getSignature().getArgumentCount() == 3) { - flags = parseFlags(getArgument(2).eval(contextSequence, contextItem) + flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem) .getStringValue()); } else { flags = 0; @@ -117,7 +118,7 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } else { final boolean ignoreWhitespace = hasIgnoreWhitespace(flags); final boolean caseBlind = !hasCaseInsensitive(flags); - pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); + pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); } } diff --git a/src/org/exist/xquery/regex/RegexUtil.java b/src/org/exist/xquery/regex/RegexUtil.java new file mode 100644 index 00000000000..0b89171fa3c --- /dev/null +++ b/src/org/exist/xquery/regex/RegexUtil.java @@ -0,0 +1,132 @@ +/* + * eXist Open Source Native XML Database + * Copyright (C) 2001-2017 The eXist Project + * http://exist-db.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.exist.xquery.regex; + +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Expression; +import org.exist.xquery.XPathException; +import org.exist.xquery.value.StringValue; + +import javax.annotation.Nullable; +import java.util.regex.Pattern; + +/** + * @author Adam Retter + */ +public class RegexUtil { + + /** + * Parses the flags for an XQuery Regular Expression. + * + * @param context The calling expression + * @param strFlags The XQuery Regular Expression flags. + * + * @return The flags for a Java Regular Expression. + */ + public static int parseFlags(final Expression context, @Nullable final String strFlags) throws XPathException { + int flags = 0; + if(strFlags != null) { + for (int i = 0; i < strFlags.length(); i++) { + final char ch = strFlags.charAt(i); + switch (ch) { + case 'm': + flags |= Pattern.MULTILINE; + break; + + case 'i': + flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; + break; + + case 'x': + flags |= Pattern.COMMENTS; + break; + + case 's': + flags |= Pattern.DOTALL; + break; + + case 'q': + flags |= Pattern.LITERAL; + break; + + default: + throw new XPathException(context, ErrorCodes.FORX0001, "Invalid regular expression flag: " + ch, new StringValue(String.valueOf(ch))); + } + } + } + return flags; + } + + /** + * Determines if the Java Regular Expression flags have the literal flag set. + * + * @param flags The Java Regular Expression flags + * + * @return true if the literal flag is set + */ + public static boolean hasLiteral(final int flags) { + return (flags & Pattern.LITERAL) != 0; + } + + /** + * Determines if the Java Regular Expression flags have the case-insensitive flag set. + * + * @param flags The Java Regular Expression flags + * + * @return true if the case-insensitive flag is set + */ + public static boolean hasCaseInsensitive(final int flags) { + return (flags & Pattern.CASE_INSENSITIVE) != 0 || (flags & Pattern.UNICODE_CASE) != 0; + } + + /** + * Determines if the Java Regular Expression flags have the ignore-whitespace flag set. + * + * @param flags The Java Regular Expression flags + * + * @return true if the ignore-whitespace flag is set + */ + public static boolean hasIgnoreWhitespace(final int flags) { + return (flags & Pattern.COMMENTS) != 0; + } + + /** + * Translates the Regular Expression from XPath3 syntax to Java regex + * syntax. + * + * @param pattern a String containing a regular expression in the syntax of XPath F&O 3.0. + * @param ignoreWhitespace true if whitespace is to be ignored ('x' flag) + * @param caseBlind true if case is to be ignored ('i' flag) + * + * @return The Java Regular Expression + * + * @throws XPathException if the XQuery Regular Expression is invalid. + */ + public static String translateRegexp(final Expression context, final String pattern, final boolean ignoreWhitespace, final boolean caseBlind) throws XPathException { + // convert pattern to Java regex syntax + try { + final int xmlVersion = 11; + return JDK15RegexTranslator.translate(pattern, xmlVersion, true, ignoreWhitespace, caseBlind); + } catch (final RegexSyntaxException e) { + throw new XPathException(context, ErrorCodes.FORX0002, "Conversion from XPath F&O 3.0 regular expression syntax to Java regular expression syntax failed: " + e.getMessage(), new StringValue(pattern), e); + } + } +}