From 080796c9c7d66f2430de052a1382bf679469efc6 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Fri, 17 Nov 2023 14:39:48 +0900 Subject: [PATCH 1/2] Add a test case for end-of-file while parsing a quoted field --- .../standards/preview/TestFilePreview.java | 5 +++++ .../preview/file/test/test_eof_in_quote.csv | 4 ++++ .../file/test/test_eof_in_quote_load.yml | 19 +++++++++++++++++++ .../file/test/test_eof_in_quote_previewed.csv | 2 ++ 4 files changed, 30 insertions(+) create mode 100644 src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote.csv create mode 100644 src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_load.yml create mode 100644 src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_previewed.csv diff --git a/src/test/java/org/embulk/standards/preview/TestFilePreview.java b/src/test/java/org/embulk/standards/preview/TestFilePreview.java index 98805eec6..99437c5f9 100644 --- a/src/test/java/org/embulk/standards/preview/TestFilePreview.java +++ b/src/test/java/org/embulk/standards/preview/TestFilePreview.java @@ -71,6 +71,11 @@ public void changePreviewSampleBufferBytes() throws Exception { "test_sample_buffer_bytes.csv", "test_sample_buffer_bytes_previewed.csv"); } + @Test + public void testEndOfFileInQuote() throws Exception { + assertPreviewedRecords(embulk, "test_eof_in_quote_load.yml", "test_eof_in_quote.csv", "test_eof_in_quote_previewed.csv"); + } + private static void assertPreviewedRecords(TestingEmbulk embulk, String loadYamlResourceName, String sourceCsvResourceName, String resultCsvResourceName) throws IOException { diff --git a/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote.csv b/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote.csv new file mode 100644 index 000000000..21142c85e --- /dev/null +++ b/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote.csv @@ -0,0 +1,4 @@ +id,account,time,purchase,comment +1,32864,2015-01-27 19:23:49,20150127,embulk +2,14824,2015-01-27 19:01:23,20150127,embulk jruby +3,27559,2015-01-28 02:20:02,20150128,"Embulk ""cs \ No newline at end of file diff --git a/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_load.yml b/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_load.yml new file mode 100644 index 000000000..dde20fdab --- /dev/null +++ b/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_load.yml @@ -0,0 +1,19 @@ +type: file +parser: + charset: UTF-8 + newline: LF + type: csv + delimiter: ',' + quote: '"' + escape: '"' + null_string: "NULL" + trim_if_not_quoted: false + skip_header_lines: 1 + allow_extra_columns: false + allow_optional_columns: false + columns: + - {name: id, type: long} + - {name: account, type: long} + - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'} + - {name: purchase, type: timestamp, format: '%Y%m%d'} + - {name: comment, type: string} \ No newline at end of file diff --git a/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_previewed.csv b/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_previewed.csv new file mode 100644 index 000000000..f6603164e --- /dev/null +++ b/src/test/resources/org/embulk/standards/preview/file/test/test_eof_in_quote_previewed.csv @@ -0,0 +1,2 @@ +1,32864,2015-01-27 19:23:49.000000 +0000,2015-01-27 00:00:00.000000 +0000,embulk +2,14824,2015-01-27 19:01:23.000000 +0000,2015-01-27 00:00:00.000000 +0000,embulk jruby From f1a86ce2cee7045781034b6aa19e0bfd9eedf744 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Fri, 17 Nov 2023 16:15:51 +0900 Subject: [PATCH 2/2] Fix the infinite loop for end-of-file in quoted field by upgrading embulk-util-csv --- build.gradle | 2 +- gradle.lockfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 30596c1d1..975fad1be 100644 --- a/build.gradle +++ b/build.gradle @@ -40,7 +40,7 @@ dependencies { implementation "com.fasterxml.jackson.core:jackson-annotations:2.6.7" implementation "com.fasterxml.jackson.core:jackson-core:2.6.7" implementation "com.fasterxml.jackson.core:jackson-databind:2.6.7.5" - implementation "org.embulk:embulk-util-csv:0.2.3" + implementation "org.embulk:embulk-util-csv:0.2.4" implementation "org.embulk:embulk-util-config:0.3.4" implementation "org.embulk:embulk-util-file:0.1.5" implementation "org.embulk:embulk-util-json:0.3.0" diff --git a/gradle.lockfile b/gradle.lockfile index 0e918882f..3508e8851 100644 --- a/gradle.lockfile +++ b/gradle.lockfile @@ -8,7 +8,7 @@ com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7=compileClasspath,runt javax.validation:validation-api:1.1.0.Final=compileClasspath,runtimeClasspath org.embulk:embulk-spi:0.11=compileClasspath org.embulk:embulk-util-config:0.3.4=compileClasspath,runtimeClasspath -org.embulk:embulk-util-csv:0.2.3=compileClasspath,runtimeClasspath +org.embulk:embulk-util-csv:0.2.4=compileClasspath,runtimeClasspath org.embulk:embulk-util-file:0.1.5=compileClasspath,runtimeClasspath org.embulk:embulk-util-json:0.3.0=compileClasspath,runtimeClasspath org.embulk:embulk-util-rubytime:0.3.3=compileClasspath,runtimeClasspath