Skip to content

Commit

Permalink
fix: parse on proper delimiter, and improve tests
Browse files Browse the repository at this point in the history
  • Loading branch information
wiedld committed Apr 25, 2024
1 parent cd91a77 commit 0cf04b5
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 10 deletions.
2 changes: 1 addition & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,7 @@ impl ConfigField for TableParquetOptions {
if key.contains("::") {
self.column_specific_options.set(key, value)
} else if key.eq("metadata") {
for maybe_pair in value.split('_') {
for maybe_pair in value.split(' ') {
let (k, v) = match maybe_pair.split(':').collect::<Vec<_>>()[..] {
[k, v] => (k.into(), Some(v.into())),
[k] => (k.into(), None),
Expand Down
12 changes: 6 additions & 6 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,7 @@ mod tests {
};
use parquet::arrow::arrow_reader::ArrowReaderOptions;
use parquet::arrow::ParquetRecordBatchStreamBuilder;
use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
use parquet::file::metadata::{KeyValue, ParquetColumnIndex, ParquetOffsetIndex};
use parquet::file::page_index::index::Index;
use tokio::fs::File;

Expand Down Expand Up @@ -1917,11 +1917,11 @@ mod tests {
);

let key_value_metadata = key_value_metadata.unwrap();
let my_metadata = key_value_metadata
.iter()
.filter(|kv| kv.key == "my-data")
.collect::<Vec<_>>();
assert_eq!(my_metadata.len(), 1);
let expected_metadata = vec![KeyValue {
key: "my-data".to_string(),
value: Some("stuff".to_string()),
}];
assert_eq!(key_value_metadata, expected_metadata);

Ok(())
}
Expand Down
37 changes: 34 additions & 3 deletions datafusion/sqllogictest/test_files/copy.slt
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ OPTIONS (

# valid vs invalid metadata

# accepts empty map
statement ok
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
Expand All @@ -299,22 +300,52 @@ OPTIONS (
'format.metadata' ''
)

statement error
# accepts map with a single entry
statement ok
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
STORED AS PARQUET
OPTIONS (
'format.metadata' 'key:value'
)

# accepts map with multiple entries
statement ok
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
STORED AS PARQUET
OPTIONS (
'format.metadata' 'key1:value1 key2:value2'
)

# accepts entries which are key-only (no value)
statement ok
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
STORED AS PARQUET
OPTIONS (
'format.metadata' 'key1 key2:value2 key3'
)

# errors for invalid key-value pair (extra `:`)
statement error DataFusion error: Invalid or Unsupported Configuration: Invalid metadata provided "foo:bar:extra"
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
STORED AS PARQUET
OPTIONS (
'format.metadata' 'foo:bar:extra'
)

statement error
# errors for invalid property (not stating `format.metadata`)
statement error DataFusion error: Invalid or Unsupported Configuration: Config value "wrong-metadata-key" not found on ParquetOptions
COPY source_table
TO 'test_files/scratch/copy/table_with_metadata/'
STORED AS PARQUET
OPTIONS (
'format.wrong-metadata-key' 'foo:bar baz'
'format.wrong-metadata-key' 'key:value'
)


# validate multiple parquet file output with all options set
statement ok
CREATE EXTERNAL TABLE validate_parquet_with_options STORED AS PARQUET LOCATION 'test_files/scratch/copy/table_with_options/';
Expand Down

0 comments on commit 0cf04b5

Please sign in to comment.