From 3efcd6a8c4fb59978e97e15ef813a023b04e139d Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Wed, 22 Jan 2025 18:08:50 -0500 Subject: [PATCH] Make scalar and array handling for array_has consistent (#13683) * Make scalar and array handling for array_has consistent * Ignore null elements for scalars and arrays * Upate comment --- datafusion/functions-nested/src/array_has.rs | 8 ++--- datafusion/sqllogictest/test_files/array.slt | 33 ++++++++++++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 0a3daa18c096..96996cc07f27 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -204,8 +204,7 @@ fn array_has_dispatch_for_array( let is_nested = arr.data_type().is_nested(); let needle_row = Scalar::new(needle.slice(i, 1)); let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?; - let is_contained = eq_array.true_count() > 0; - boolean_builder.append_value(is_contained) + boolean_builder.append_value(eq_array.true_count() > 0); } Ok(Arc::new(boolean_builder.finish())) @@ -238,10 +237,7 @@ fn array_has_dispatch_for_scalar( continue; } let sliced_array = eq_array.slice(start, length); - // For nested list, check number of nulls - if sliced_array.null_count() != length { - final_contained[i] = Some(sliced_array.true_count() > 0); - } + final_contained[i] = Some(sliced_array.true_count() > 0); } Ok(Arc::new(BooleanArray::from(final_contained))) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 9a0a25f62066..22a85eb15512 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -352,6 +352,16 @@ AS VALUES (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) ; +statement ok +CREATE TABLE array_has_table_null +AS VALUES + (make_array(1, 2), 1), + (make_array(1, NULL), 1), + (make_array(3, 4, 5), 2), + (make_array(3, NULL, 5), 2), + (make_array(NULL, NULL, NULL), 2) +; + statement ok CREATE TABLE array_distinct_table_1D AS VALUES @@ -5260,6 +5270,13 @@ select array_has([], null), ---- NULL NULL NULL +# Always return false if not contained even if list has null elements +query BB +select array_has([1, null, 2], 3), + array_has([null, null, null], 3); +---- +false false + #TODO: array_has_all and array_has_any cannot handle NULL #query BBBB #select array_has_any([], null), @@ -5338,6 +5355,16 @@ from array_has_table_1D; true true true false false false +query B +select array_has(column1, column2) +from array_has_table_null; +---- +true +true +false +false +false + query B select array_has(column1, column2) from fixed_size_array_has_table_1D; @@ -5574,9 +5601,9 @@ false false false true true false true false true false false true false true false false -NULL NULL false false -false false NULL false -false false false NULL +false false false false +false false false false +false false false false query BBBBBBBBBBBBB select array_has_all(make_array(1,2,3), make_array(1,3)),