From ef941084413d5da61970fed50179b08d36da596a Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Fri, 8 Nov 2024 18:48:15 +0100 Subject: [PATCH] Fix string view ILIKE checks with NULL values --- arrow-string/src/like.rs | 226 ++++++++++++++++++---------------- arrow-string/src/predicate.rs | 14 ++- 2 files changed, 126 insertions(+), 114 deletions(-) diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs index fee3b792a946..1b04b4eb5601 100644 --- a/arrow-string/src/like.rs +++ b/arrow-string/src/like.rs @@ -1720,33 +1720,36 @@ mod tests { "%a%", // can_execute as contains("a") "%a%b_c_d%e", // can_execute as regular expression ] { - let a = Scalar::new(StringArray::new_null(1)); - let b = StringArray::new_scalar(pattern); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); - - let a = Scalar::new(StringArray::new_null(1)); - let b = StringArray::from_iter_values([pattern]); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); - - let a = StringArray::new_null(1); - let b = StringArray::from_iter_values([pattern]); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); - - let a = StringArray::new_null(1); - let b = StringArray::new_scalar(pattern); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); + // These tests focus on the null handling, but are case-insensitive + for like_f in [like, ilike, nlike, nilike] { + let a = Scalar::new(StringArray::new_null(1)); + let b = StringArray::new_scalar(pattern); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + + let a = Scalar::new(StringArray::new_null(1)); + let b = StringArray::from_iter_values([pattern]); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + + let a = StringArray::new_null(1); + let b = StringArray::from_iter_values([pattern]); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + + let a = StringArray::new_null(1); + let b = StringArray::new_scalar(pattern); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + } } } @@ -1763,95 +1766,102 @@ mod tests { "%a%", // can_execute as contains("a") "%a%b_c_d%e", // can_execute as regular expression ] { - let a = Scalar::new(StringViewArray::new_null(1)); - let b = StringViewArray::new_scalar(pattern); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); - - let a = Scalar::new(StringViewArray::new_null(1)); - let b = StringViewArray::from_iter_values([pattern]); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); - - let a = StringViewArray::new_null(1); - let b = StringViewArray::from_iter_values([pattern]); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); - - let a = StringViewArray::new_null(1); - let b = StringViewArray::new_scalar(pattern); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1, "With pattern {pattern}"); - assert_eq!(r.null_count(), 1, "With pattern {pattern}"); - assert!(r.is_null(0), "With pattern {pattern}"); + // These tests focus on the null handling, but are case-insensitive + for like_f in [like, ilike, nlike, nilike] { + let a = Scalar::new(StringViewArray::new_null(1)); + let b = StringViewArray::new_scalar(pattern); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + + let a = Scalar::new(StringViewArray::new_null(1)); + let b = StringViewArray::from_iter_values([pattern]); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + + let a = StringViewArray::new_null(1); + let b = StringViewArray::from_iter_values([pattern]); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + + let a = StringViewArray::new_null(1); + let b = StringViewArray::new_scalar(pattern); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1, "With pattern {pattern}"); + assert_eq!(r.null_count(), 1, "With pattern {pattern}"); + assert!(r.is_null(0), "With pattern {pattern}"); + } } } #[test] fn string_like_scalar_null() { - let a = StringArray::new_scalar("a"); - let b = Scalar::new(StringArray::new_null(1)); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); - - let a = StringArray::from_iter_values(["a"]); - let b = Scalar::new(StringArray::new_null(1)); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); - - let a = StringArray::from_iter_values(["a"]); - let b = StringArray::new_null(1); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); - - let a = StringArray::new_scalar("a"); - let b = StringArray::new_null(1); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); + for like_f in [like, ilike, nlike, nilike] { + let a = StringArray::new_scalar("a"); + let b = Scalar::new(StringArray::new_null(1)); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + + let a = StringArray::from_iter_values(["a"]); + let b = Scalar::new(StringArray::new_null(1)); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + + let a = StringArray::from_iter_values(["a"]); + let b = StringArray::new_null(1); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + + let a = StringArray::new_scalar("a"); + let b = StringArray::new_null(1); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + } } #[test] fn string_view_like_scalar_null() { - let a = StringViewArray::new_scalar("a"); - let b = Scalar::new(StringViewArray::new_null(1)); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); - - let a = StringViewArray::from_iter_values(["a"]); - let b = Scalar::new(StringViewArray::new_null(1)); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); - - let a = StringViewArray::from_iter_values(["a"]); - let b = StringViewArray::new_null(1); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); - - let a = StringViewArray::new_scalar("a"); - let b = StringViewArray::new_null(1); - let r = like(&a, &b).unwrap(); - assert_eq!(r.len(), 1); - assert_eq!(r.null_count(), 1); - assert!(r.is_null(0)); + for like_f in [like, ilike, nlike, nilike] { + let a = StringViewArray::new_scalar("a"); + let b = Scalar::new(StringViewArray::new_null(1)); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + + let a = StringViewArray::from_iter_values(["a"]); + let b = Scalar::new(StringViewArray::new_null(1)); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + + let a = StringViewArray::from_iter_values(["a"]); + let b = StringViewArray::new_null(1); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + + let a = StringViewArray::new_scalar("a"); + let b = StringViewArray::new_null(1); + let r = like_f(&a, &b).unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r.null_count(), 1); + assert!(r.is_null(0)); + } } } diff --git a/arrow-string/src/predicate.rs b/arrow-string/src/predicate.rs index 1d141e6b0b21..ae2493692df3 100644 --- a/arrow-string/src/predicate.rs +++ b/arrow-string/src/predicate.rs @@ -138,8 +138,8 @@ impl<'a> Predicate<'a> { } Predicate::IStartsWithAscii(v) => { if let Some(string_view_array) = array.as_any().downcast_ref::() { - // TODO respect null buffer - BooleanArray::from( + let nulls = string_view_array.logical_nulls(); + let values = BooleanBuffer::from( string_view_array .prefix_bytes_iter(v.len()) .map(|haystack| { @@ -150,7 +150,8 @@ impl<'a> Predicate<'a> { ) != negate }) .collect::>(), - ) + ); + BooleanArray::new(values, nulls) } else { BooleanArray::from_unary(array, |haystack| { starts_with(haystack, v, equals_ignore_ascii_case_kernel) != negate @@ -177,8 +178,8 @@ impl<'a> Predicate<'a> { } Predicate::IEndsWithAscii(v) => { if let Some(string_view_array) = array.as_any().downcast_ref::() { - // TODO respect null buffer - BooleanArray::from( + let nulls = string_view_array.logical_nulls(); + let values = BooleanBuffer::from( string_view_array .suffix_bytes_iter(v.len()) .map(|haystack| { @@ -189,7 +190,8 @@ impl<'a> Predicate<'a> { ) != negate }) .collect::>(), - ) + ); + BooleanArray::new(values, nulls) } else { BooleanArray::from_unary(array, |haystack| { ends_with(haystack, v, equals_ignore_ascii_case_kernel) != negate