From 17b5e71707326a25601bd58b3ad91a745b8a3e10 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 3 Feb 2025 12:22:15 -0500 Subject: [PATCH] Fix regression list Type Coercion List with inner type struct which has large/view types (#14385) * Test for coercing inner structs * Fix but, update tests * Update tests --- .../expr-common/src/type_coercion/binary.rs | 15 +++--- datafusion/sqllogictest/test_files/case.slt | 53 +++++++++++++++++++ datafusion/sqllogictest/test_files/struct.slt | 8 +-- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index ba87a53e5fe8..3be35490a4d0 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -566,18 +566,19 @@ fn type_union_resolution_coercion( None } - let types = lhs + let coerced_types = lhs .iter() .map(|lhs_field| search_corresponding_coerced_type(lhs_field, rhs)) .collect::>>()?; - let fields = types + // preserve the field name and nullability + let orig_fields = std::iter::zip(lhs.iter(), rhs.iter()); + + let fields: Vec = coerced_types .into_iter() - .enumerate() - .map(|(i, datatype)| { - Arc::new(Field::new(format!("c{i}"), datatype, true)) - }) - .collect::>(); + .zip(orig_fields) + .map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, rhs)) + .collect(); Some(DataType::Struct(fields.into())) } _ => { diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 46e9c86c7591..8e470fe988d3 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -416,5 +416,58 @@ SELECT end FROM t; +statement ok +drop table t + +# Fix coercion of lists of structs +# https://github.com/apache/datafusion/issues/14154 + +statement ok +create or replace table t as values +( + 100, -- column1 int (so the case isn't constant folded) + [{ 'foo': arrow_cast('baz', 'Utf8View') }], -- column2 has List of Struct w/ Utf8View + [{ 'foo': 'bar' }], -- column3 has List of Struct w/ Utf8 + [{ 'foo': 'blarg' }] -- column4 has List of Struct w/ Utf8 +); + +# This case forces all branches to be coerced to the same type +query ? +SELECT + case + when column1 > 0 then column2 + when column1 < 0 then column3 + else column4 + end +FROM t; +---- +[{foo: baz}] + +# different orders of the branches +query ? +SELECT + case + when column1 > 0 then column3 -- NB different order + when column1 < 0 then column4 + else column2 + end +FROM t; +---- +[{foo: bar}] + +# different orders of the branches +query ? +SELECT + case + when column1 > 0 then column4 -- NB different order + when column1 < 0 then column2 + else column3 + end +FROM t; +---- +[{foo: blarg}] + + + statement ok drop table t diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index d671798b7d0f..0afe39de1795 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -459,14 +459,14 @@ create table t as values({r: 'a', c: 1}), ({r: 'b', c: 2.3}); query ? select * from t; ---- -{c0: a, c1: 1.0} -{c0: b, c1: 2.3} +{r: a, c: 1.0} +{r: b, c: 2.3} query T select arrow_typeof(column1) from t; ---- -Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) -Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) +Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) +Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) statement ok drop table t;