Skip to content

Commit

Permalink
fix some issues with line endings CR/LF vs LF
Browse files Browse the repository at this point in the history
  • Loading branch information
Robin Duda committed Nov 29, 2018
1 parent 303e85c commit 6bb7884
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 15 deletions.
5 changes: 4 additions & 1 deletion src/main/java/com/codingchili/ApplicationLauncher.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ public static void main(String[] args) {

private ApplicationLauncher(String[] args) {
VertxOptions options = new VertxOptions();
options.setMaxEventLoopExecuteTime(options.getMaxEventLoopExecuteTime() * 10)

options.setMaxWorkerExecuteTime(options.getMaxWorkerExecuteTime() * 20) // 20 minutes.
.setMaxEventLoopExecuteTime(options.getMaxEventLoopExecuteTime() * 10) // 10 seconds.
.setBlockedThreadCheckInterval(8000);

vertx = Vertx.vertx();

ImportEventCodec.registerOn(vertx);
Expand Down
34 changes: 21 additions & 13 deletions src/main/java/com/codingchili/Model/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,21 +110,23 @@ private void readRowCount() {
reset();

for (long i = 0; i < fileSize; i++) {
if (get() == '\r') {
if (get() == TOKEN_LF) {
rows++;
row = rows;
}
}
}

private void readHeaders() {
AtomicInteger fieldId = new AtomicInteger(0);
reset();

for (long i = 0; i < fileSize; i++) {
byte current = get();
if (current == TOKEN_LF || current == TOKEN_CR) {
if (current == TOKEN_LF) {
Arrays.stream(new String(buffer.array()).split(","))
.map(header -> header.replaceAll("\"", ""))
.map(header -> (header.isEmpty()) ? "header_" + fieldId.incrementAndGet() : header)
.map(String::trim).forEach(header -> {
headers.put(header, "<empty>");
});
Expand Down Expand Up @@ -183,17 +185,20 @@ private JsonObject readRow() {
process(columnsRead, json);
done = true;
break;
case TOKEN_LF:
// skip LF characters.
break;
case TOKEN_CR:
// final header is being read and EOL appears.
if (columnsRead.get() == headers.size() - 1) {
process(columnsRead, json);
done = true;
break;
} else {
throw new ColumnsHeadersMismatchException(columnsRead.get(), headers.size() - 1, row + 1);
// skip CR characters.
break;
case TOKEN_LF:
// ignore empty lines.
if (buffer.position() > 0) {
// final header is being read and EOL appears.
if (columnsRead.get() == headers.size() - 1) {
process(columnsRead, json);
done = true;
break;
} else {
throw new ColumnsHeadersMismatchException(columnsRead.get(), headers.size() - 1, row + 1);
}
}
case TOKEN_QUOTE:
// toggle quoted to support commas within quotes.
Expand Down Expand Up @@ -223,16 +228,19 @@ private JsonObject readRow() {
return json;
}


private static final Predicate<String> floatPattern = Pattern.compile("^[0-9]+\\.[0-9]+$").asPredicate();
private static final Predicate<String> numberPattern = Pattern.compile("^[0-9]+$").asPredicate();
private static final Predicate<String> boolPattern = Pattern.compile("^(true|false)$").asPredicate();

private Object parseDatatype(byte[] data) {
String line = new String(data).trim();

// skip regex parsing on dry-run.
if (line.length() > 0) {
if (numberPattern.test(line)) {
return Long.parseLong(line);
} else if (floatPattern.test(line)) {
return Double.parseDouble(line);
} else if (boolPattern.test(line)) {
return Boolean.parseBoolean(line);
} else {
Expand Down
4 changes: 3 additions & 1 deletion src/test/resources/test.csv
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
Column 1, Column 2, Column 3cell 6.1, cell 6.2, cell 6.3cell 7.1, cell 7.2, cell 7.3
Column 1, Column 2, Column 3
cell 6.1, cell 6.2, cell 6.3
cell 7.1, cell 7.2, cell 7.3

0 comments on commit 6bb7884

Please sign in to comment.