Skip to content

Commit

Permalink
[refactor] Reduce the ammount of duplicate code between regex functions
Browse files Browse the repository at this point in the history
  • Loading branch information
adamretter committed Nov 27, 2017
1 parent 9c652d3 commit 29c4489
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 145 deletions.
72 changes: 4 additions & 68 deletions src/org/exist/xquery/functions/fn/FunAnalyzeString.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.regex.JDK15RegexTranslator;
import org.exist.xquery.regex.RegexSyntaxException;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.NodeValue;
Expand All @@ -21,9 +19,10 @@
import org.exist.xquery.value.Type;
import org.xml.sax.helpers.AttributesImpl;

import javax.annotation.Nullable;
import javax.xml.XMLConstants;

import static org.exist.xquery.regex.RegexUtil.*;

/**
* XPath and XQuery 3.0 F+O fn:analyze-string()
*
Expand Down Expand Up @@ -108,10 +107,10 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro

private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, final String flags) throws XPathException {

final int iFlags = parseStringFlags(flags);
final int iFlags = parseFlags(this, flags);

if(!hasLiteral(iFlags)) {
pattern = translateRegexp(pattern, hasIgnoreWhitespace(iFlags), hasCaseInsensitive(iFlags));
pattern = translateRegexp(this, pattern, hasIgnoreWhitespace(iFlags), hasCaseInsensitive(iFlags));
}

final Pattern ptn = PatternFactory.getInstance().getPattern(pattern, iFlags);
Expand Down Expand Up @@ -185,67 +184,4 @@ private void nonMatch(final MemTreeBuilder builder, final String nonMatch) {
builder.characters(nonMatch);
builder.endElement();
}

private int parseStringFlags(@Nullable final String flags) {
int iFlags = 0;
if(flags != null) {
for (final char c : flags.toCharArray()) {
switch (c) {
case 's':
iFlags |= Pattern.DOTALL;
break;

case 'm':
iFlags |= Pattern.MULTILINE;
break;

case 'i':
iFlags |= Pattern.CASE_INSENSITIVE;
break;

case 'x':
iFlags |= Pattern.COMMENTS;
break;

case 'q':
iFlags |= Pattern.LITERAL;
break;
}
}
}
return iFlags;
}

private boolean hasLiteral(final int flags) {
return (flags & Pattern.LITERAL) != 0;
}

private boolean hasCaseInsensitive(final int flags) {
return (flags & Pattern.CASE_INSENSITIVE) != 0 || (flags & Pattern.UNICODE_CASE) != 0;
}

private boolean hasIgnoreWhitespace(final int flags) {
return (flags & Pattern.COMMENTS) != 0;
}

/**
* Translates the regular expression from XPath2 syntax to java regex
* syntax.
*
* @param pattern a String containing a regular expression in the syntax of XML Schemas Part 2
* @param ignoreWhitespace true if whitespace is to be ignored ('x' flag)
* @param caseBlind true if case is to be ignored ('i' flag)
* @return The translated regexp
* @throws XPathException
*/
protected String translateRegexp(final String pattern, final boolean ignoreWhitespace, final boolean caseBlind) throws XPathException {
// convert pattern to Java regex syntax
try {
final int xmlVersion = 11;
return JDK15RegexTranslator.translate(pattern, xmlVersion, true, ignoreWhitespace, caseBlind);
} catch (final RegexSyntaxException e) {
throw new XPathException(this, "Conversion from XPath2 to Java regular expression " +
"syntax failed: " + e.getMessage(), e);
}
}
}
83 changes: 10 additions & 73 deletions src/org/exist/xquery/functions/fn/FunMatches.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
import org.exist.storage.NativeValueIndex;
import org.exist.util.PatternFactory;
import org.exist.xquery.pragmas.Optimize;
import org.exist.xquery.regex.JDK15RegexTranslator;
import org.exist.xquery.regex.RegexSyntaxException;
import org.exist.xquery.*;
import org.exist.xquery.util.Error;
import org.exist.xquery.value.BooleanValue;
Expand All @@ -48,6 +46,8 @@
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import static org.exist.xquery.regex.RegexUtil.*;

/**
* Implements the fn:matches() function.
*
Expand Down Expand Up @@ -214,7 +214,7 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP
final int flags;
if(getSignature().getArgumentCount() == 3) {
final String flagsArg = getArgument(2).eval(contextSequence).getStringValue();
flags = parseFlags(flagsArg);
flags = parseFlags(this, flagsArg);
} else {
flags = 0;
}
Expand All @@ -232,7 +232,7 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP
} else {
final boolean ignoreWhitespace = hasIgnoreWhitespace(flags);
final boolean caseBlind = !caseSensitive;
pattern = translateRegexp(getArgument(1).eval(contextSequence).getStringValue(), ignoreWhitespace, caseBlind);
pattern = translateRegexp(this, getArgument(1).eval(contextSequence).getStringValue(), ignoreWhitespace, caseBlind);
}
}

Expand All @@ -250,18 +250,6 @@ public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XP
return preselectResult;
}

protected boolean hasLiteral(final int flags) {
return (flags & Pattern.LITERAL) != 0;
}

protected boolean hasCaseInsensitive(final int flags) {
return (flags & Pattern.CASE_INSENSITIVE) != 0 || (flags & Pattern.UNICODE_CASE) != 0;
}

protected boolean hasIgnoreWhitespace(final int flags) {
return (flags & Pattern.COMMENTS) != 0;
}

@Override
public int getDependencies() {
final Expression stringArg = getArgument(0);
Expand Down Expand Up @@ -386,7 +374,7 @@ private Sequence evalWithIndex(Sequence contextSequence, Item contextItem, Seque
final int flags;
if(getSignature().getArgumentCount() == 3) {
final String flagsArg = getArgument(2).eval(contextSequence, contextItem).getStringValue();
flags = parseFlags(flagsArg);
flags = parseFlags(this, flagsArg);
} else {
flags = 0;
}
Expand All @@ -406,7 +394,7 @@ private Sequence evalWithIndex(Sequence contextSequence, Item contextItem, Seque
} else {
final boolean ignoreWhitespace = hasIgnoreWhitespace(flags);
final boolean caseBlind = !caseSensitive;
pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
}
}

Expand Down Expand Up @@ -480,27 +468,6 @@ private Sequence evalFallback(NodeSet nodes, String pattern, int flags, int inde
return result;
}

/**
* Translates the regular expression from XPath2 syntax to java regex
* syntax.
*
* @param pattern a String containing a regular expression in the syntax of XML Schemas Part 2
* @param ignoreWhitespace true if whitespace is to be ignored ('x' flag)
* @param caseBlind true if case is to be ignored ('i' flag)
* @return The translated regexp
* @throws XPathException
*/
protected String translateRegexp(final String pattern, final boolean ignoreWhitespace, final boolean caseBlind) throws XPathException {
// convert pattern to Java regex syntax
try {
final int xmlVersion = 11;
return JDK15RegexTranslator.translate(pattern, xmlVersion, true, ignoreWhitespace, caseBlind);
} catch (final RegexSyntaxException e) {
throw new XPathException(this, "Conversion from XPath2 to Java regular expression " +
"syntax failed: " + e.getMessage(), e);
}
}

/**
* @param contextSequence
* @param contextItem
Expand All @@ -513,7 +480,7 @@ private Sequence evalGeneric(Sequence contextSequence, Item contextItem, Sequenc

final int flags;
if(getSignature().getArgumentCount() == 3) {
flags = parseFlags(getArgument(2).eval(contextSequence, contextItem).getStringValue());
flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem).getStringValue());
} else {
flags = 0;
}
Expand All @@ -529,7 +496,7 @@ private Sequence evalGeneric(Sequence contextSequence, Item contextItem, Sequenc
} else {
final boolean ignoreWhitespace = hasIgnoreWhitespace(flags);
final boolean caseBlind = hasCaseInsensitive(flags);
pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
}
}

Expand Down Expand Up @@ -560,43 +527,13 @@ private boolean match(String string, String pattern, int flags) throws XPathExce
}
}

protected final static int parseFlags(final String s) throws XPathException {
int flags = 0;
for(int i = 0; i < s.length(); i++) {
final char ch = s.charAt(i);
switch(ch) {
case 'm':
flags |= Pattern.MULTILINE;
break;
case 'i':
flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
break;
case 'x':
flags |= Pattern.COMMENTS;
break;
case 's':
flags |= Pattern.DOTALL;
break;
case 'q' :
flags |= Pattern.LITERAL;
break;
default:
throw new XPathException("err:FORX0001: Invalid regular expression flag: " + ch);
}
}
return flags;
}

@Override
public void reset() {
super.reset();
hasUsedIndex = false;
}

/*
* (non-Javadoc)
*
* @see org.exist.xquery.AbstractExpression#resetState()
*/
@Override
public void resetState(boolean postOptimization) {
super.resetState(postOptimization);
if (!postOptimization)
Expand Down
6 changes: 4 additions & 2 deletions src/org/exist/xquery/functions/fn/FunReplace.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;

import static org.exist.xquery.regex.RegexUtil.*;

/**
* @author Wolfgang Meier ([email protected])
*/
Expand Down Expand Up @@ -166,7 +168,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc
} else {
final int flags;
if (getSignature().getArgumentCount() == 4) {
flags = parseFlags(getArgument(3).eval(contextSequence, contextItem).getStringValue());
flags = parseFlags(this, getArgument(3).eval(contextSequence, contextItem).getStringValue());
} else {
flags = 0;
}
Expand All @@ -187,7 +189,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc
.replace("\\", "\\\\")
.replace("$", "\\$");
} else {
pattern = translateRegexp(patternSeq.getStringValue(), hasIgnoreWhitespace(flags), hasCaseInsensitive(flags));
pattern = translateRegexp(this, patternSeq.getStringValue(), hasIgnoreWhitespace(flags), hasCaseInsensitive(flags));
}

//An error is raised [err:FORX0004] if the value of $replacement contains a "$" character that is not immediately followed by a digit 0-9 and not immediately preceded by a "\".
Expand Down
5 changes: 3 additions & 2 deletions src/org/exist/xquery/functions/fn/FunTokenize.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.exist.xquery.value.ValueSequence;

import static org.exist.xquery.FunctionDSL.*;
import static org.exist.xquery.regex.RegexUtil.*;

/**
* @author Wolfgang Meier ([email protected])
Expand Down Expand Up @@ -100,7 +101,7 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
} else {
final int flags;
if (getSignature().getArgumentCount() == 3) {
flags = parseFlags(getArgument(2).eval(contextSequence, contextItem)
flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem)
.getStringValue());
} else {
flags = 0;
Expand All @@ -117,7 +118,7 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
} else {
final boolean ignoreWhitespace = hasIgnoreWhitespace(flags);
final boolean caseBlind = !hasCaseInsensitive(flags);
pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
}
}

Expand Down
Loading

0 comments on commit 29c4489

Please sign in to comment.