Skip to content

Commit

Permalink
Extracted PatternMatcher utility class from decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
amichair committed May 22, 2016
1 parent 99fd131 commit dcc0ce0
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 179 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,15 @@
* message string is then passed to MINA IO handlers for further processing.
*/
public class FIXMessageDecoder implements MessageDecoder {

private static final char SOH = '\001';
private static final String FIELD_DELIMITER = String.valueOf(SOH);

private final Logger log = LoggerFactory.getLogger(getClass());

private final byte[] HEADER_PATTERN;
private final byte[] CHECKSUM_PATTERN;
private final byte[] LOGON_PATTERN;
private final PatternMatcher HEADER_PATTERN;
private final PatternMatcher CHECKSUM_PATTERN;
private final PatternMatcher LOGON_PATTERN;

// Parsing states
private static final int SEEKING_HEADER = 1;
Expand Down Expand Up @@ -86,14 +87,14 @@ public FIXMessageDecoder(String charset) throws UnsupportedEncodingException {

public FIXMessageDecoder(String charset, String delimiter) throws UnsupportedEncodingException {
charsetEncoding = CharsetSupport.validate(charset);
HEADER_PATTERN = getBytes("8=FIXt.?.?" + delimiter + "9=");
CHECKSUM_PATTERN = getBytes("10=???" + delimiter);
LOGON_PATTERN = getBytes(delimiter + "35=A" + delimiter);
HEADER_PATTERN = new PatternMatcher("8=FIXt.?.?" + delimiter + "9=");
CHECKSUM_PATTERN = new PatternMatcher("10=???" + delimiter);
LOGON_PATTERN = new PatternMatcher(delimiter + "35=A" + delimiter);
resetState();
}

public MessageDecoderResult decodable(IoSession session, IoBuffer in) {
boolean hasHeader = indexOf(in, in.position(), HEADER_PATTERN) != -1L;
boolean hasHeader = HEADER_PATTERN.find(in, in.position()) != -1L;
return hasHeader ? MessageDecoderResult.OK :
(in.remaining() > MAX_UNDECODED_DATA_LENGTH ? MessageDecoderResult.NOT_OK : MessageDecoderResult.NEED_DATA);
}
Expand Down Expand Up @@ -129,7 +130,7 @@ private boolean parseMessage(IoBuffer in, ProtocolDecoderOutput out)
while (in.hasRemaining() && !messageFound) {
if (state == SEEKING_HEADER) {

long headerPos = indexOf(in, position, HEADER_PATTERN);
long headerPos = HEADER_PATTERN.find(in, position);
if (headerPos == -1L) {
break;
}
Expand Down Expand Up @@ -183,7 +184,7 @@ private boolean parseMessage(IoBuffer in, ProtocolDecoderOutput out)
}

if (state == PARSING_CHECKSUM) {
if (matches(in, position, CHECKSUM_PATTERN) > 0) {
if (CHECKSUM_PATTERN.match(in, position) > 0) {
// we are trying to parse the checksum but should
// check if the CHECKSUM_PATTERN is preceded by SOH
// or if the pattern just occurs inside of another field
Expand All @@ -195,9 +196,9 @@ private boolean parseMessage(IoBuffer in, ProtocolDecoderOutput out)
if (log.isDebugEnabled()) {
log.debug("found checksum: " + getBufferDebugInfo(in));
}
position += CHECKSUM_PATTERN.length;
position += CHECKSUM_PATTERN.getMinLength();
} else {
if (position + CHECKSUM_PATTERN.length <= in.limit()) {
if (position + CHECKSUM_PATTERN.getMinLength() <= in.limit()) {
// FEATURE allow configurable recovery position
// int recoveryPosition = in.position() + 1;
// Following recovery position is compatible with QuickFIX C++
Expand Down Expand Up @@ -250,16 +251,6 @@ private boolean hasRemaining(IoBuffer in) {
return position < in.limit();
}

private static int minPatternLength(byte[] pattern) {
int len = 0;
for (byte b : pattern) {
if (b < 'a' || b > 'z') { // if not optional character (lowercase)
len++;
}
}
return len;
}

private String getMessageString(IoBuffer buffer) throws UnsupportedEncodingException {
byte[] data = new byte[position - buffer.position()];
buffer.get(data);
Expand Down Expand Up @@ -288,68 +279,7 @@ private void handleError(IoBuffer buffer, int recoveryPosition, String text,
}

private boolean isLogon(IoBuffer buffer) {
return indexOf(buffer, buffer.position(), LOGON_PATTERN) != -1L;
}

/**
* Searches for the given pattern within a buffer,
* starting at the given buffer position.
*
* @param buffer the buffer to search within
* @param position the buffer position to start searching at
* @param pattern the pattern to search for
* @return a long value whose lower 32 bits contain the index of the
* found pattern, and upper 32 bits contain the found pattern length;
* if the pattern is not found at all, returns -1L
*/
private static long indexOf(IoBuffer buffer, int position, byte[] pattern) {
int length;
byte first = pattern[0];
for (int limit = buffer.limit() - minPatternLength(pattern) + 1; position < limit; position++) {
if (buffer.get(position) == first && (length = matches(buffer, position, pattern)) > 0) {
return (long)length << 32 | position;
}
}
return -1L;
}

/**
* Checks if the buffer at the given offset matches the given pattern.
* The character '?' is a one byte wildcard, and lowercase letters are optional.
*
* @param buffer the buffer to check
* @param bufferOffset the buffer offset at which to check
* @param pattern the pattern to try matching
* @return the length of the matched pattern, or -1 if there is no match
*/
private static int matches(IoBuffer buffer, int bufferOffset, byte[] pattern) {
if (bufferOffset + minPatternLength(pattern) > buffer.limit()) {
return -1;
}
final int initOffset = bufferOffset;
int patternOffset = 0;
for (int bufferLimit = buffer.limit(); patternOffset < pattern.length
&& bufferOffset < bufferLimit; patternOffset++, bufferOffset++) {
byte b = pattern[patternOffset];
// check exact character match or wildcard match
if (buffer.get(bufferOffset) == b || b == '?')
continue;
// check optional character match
if (b >= 'a' && b <= 'z') { // lowercase is optional
// at this point we know it's not an exact match, so we only need to check the
// uppercase character. If there's a match we go on as usual, and if not we
// ignore the optional character by rewinding the buffer offset
if (b - 'a' + 'A' != buffer.get(bufferOffset)) // no uppercase match
bufferOffset--;
continue;
}
return -1; // no match
}
if (patternOffset != pattern.length) {
// when minPatternLength(pattern) != pattern.length we might run out of buffer before we run out of pattern
return -1;
}
return bufferOffset - initOffset;
return LOGON_PATTERN.find(buffer, buffer.position()) != -1L;
}

public void finishDecode(IoSession arg0, ProtocolDecoderOutput arg1) throws Exception {
Expand Down Expand Up @@ -419,11 +349,4 @@ public void flush(IoFilter.NextFilter nextFilter, IoSession ioSession) {
fileIn.close();
}

private static byte[] getBytes(String s) {
try {
return s.getBytes(CharsetSupport.getDefaultCharset());
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
}
126 changes: 126 additions & 0 deletions quickfixj-core/src/main/java/quickfix/mina/message/PatternMatcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package quickfix.mina.message;

import org.apache.mina.core.buffer.IoBuffer;
import org.quickfixj.CharsetSupport;

import java.io.UnsupportedEncodingException;

/**
* Finds a byte pattern within a buffer.
* <p>
* Matching is performed on bytes rather than characters, but we
* consider them interchangeable with ASCII characters for simplicity.
* <p>
* The question mark character ('?') is treated as a one-byte wildcard.
* Lowercase letters are considered optional (and matched case-insensitively).
* Uppercase letters and all other values are matched as literals.
* <p>
* This class is immutable and thus can be used concurrently from multiple threads.
*/
class PatternMatcher {

private final byte[] pattern;
private final int minLength;

/**
* Constructs a PatternMatcher which matches the given pattern.
*
* @param pattern a pattern (see {@link PatternMatcher} for details)
*/
PatternMatcher(String pattern) {
this.pattern = getBytes(pattern);
this.minLength = calculateMinLength();
}

/**
* Returns the minimum number of bytes that the pattern can match.
* If the pattern has no optional characters, this is simply the
* pattern length.
*
* @return the minimum number of bytes that the pattern can match
*/
public int getMinLength() {
return minLength;
}

private static byte[] getBytes(String s) {
try {
return s.getBytes(CharsetSupport.getDefaultCharset());
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}

/**
* Calculates the minimum number of bytes that the pattern can match.
*
* @return the minimum number of bytes that the pattern can match
*/
private int calculateMinLength() {
int len = 0;
for (byte b : pattern) {
if (b < 'a' || b > 'z') { // if not optional character (lowercase)
len++;
}
}
return len;
}

/**
* Searches for the given pattern within a buffer,
* starting at the given buffer offset.
*
* @param buffer the buffer to search within
* @param offset the buffer offset to start searching at
* @return a long value whose lower 32 bits contain the index of the
* found pattern, and upper 32 bits contain the found pattern length;
* if the pattern is not found at all, returns -1L
*/
public long find(IoBuffer buffer, int offset) {
int length;
byte first = pattern[0];
for (int limit = buffer.limit() - minLength + 1; offset < limit; offset++) {
if (buffer.get(offset) == first && (length = match(buffer, offset)) > 0) {
return (long)length << 32 | offset;
}
}
return -1L;
}

/**
* Checks if the buffer contents at the given offset matches the pattern.
*
* @param buffer the buffer to check
* @param offset the buffer offset at which to check
* @return the length of the matched pattern, or -1 if there is no match
*/
public int match(IoBuffer buffer, int offset) {
if (offset + minLength > buffer.limit()) {
return -1;
}
final int startOffset = offset;
int patternOffset = 0;
for (int bufferLimit = buffer.limit(); patternOffset < pattern.length
&& offset < bufferLimit; patternOffset++, offset++) {
byte b = pattern[patternOffset];
// check exact character match or wildcard match
if (buffer.get(offset) == b || b == '?')
continue;
// check optional character match
if (b >= 'a' && b <= 'z') { // lowercase is optional
// at this point we know it's not an exact match, so we only need to check the
// uppercase character. If there's a match we go on as usual, and if not we
// ignore the optional character by rewinding the buffer offset
if (b - 'a' + 'A' != buffer.get(offset)) // no uppercase match
offset--;
continue;
}
return -1; // no match
}
if (patternOffset != pattern.length) {
// when minPatternLength(pattern) != pattern.length we might run out of buffer before we run out of pattern
return -1;
}
return offset - startOffset;
}
}
Loading

0 comments on commit dcc0ce0

Please sign in to comment.