-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Coalesce
casting logic to follows what Postgres and DuckDB do. Introduce signature that do non-comparison coercion
#10268
Changes from 5 commits
c79156f
a36e6b2
bf16c92
407e3c7
03b9162
4abf29d
4965e8d
81f0235
bae996c
ddf9b1c
c2799ea
2574896
6a17e57
4cba8c5
f1cfb8d
d2e83d3
3a88ad7
03880a3
481f548
dfc4176
46a9060
d656645
5683447
b949fae
5aaeb5b
a968c0e
cf679c5
15471ab
e5cc46b
a810e85
cb16cda
53bedda
a37da2d
70239e0
be116f8
8f4e991
6a8fe6f
20e618e
4153593
030a519
5b797d5
b954479
829b5a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,14 +92,19 @@ pub enum TypeSignature { | |
/// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])` | ||
Variadic(Vec<DataType>), | ||
/// One or more arguments of an arbitrary but equal type. | ||
/// DataFusion attempts to coerce all argument types to match the first argument's type | ||
/// DataFusion attempts to coerce all argument types to match to the common type with comparision coercion. | ||
/// | ||
/// # Examples | ||
/// Given types in signature should be coercible to the same final type. | ||
/// A function such as `make_array` is `VariadicEqual`. | ||
/// | ||
/// `make_array(i32, i64) -> make_array(i64, i64)` | ||
VariadicEqual, | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
/// One or more arguments of an arbitrary but equal type or Null. | ||
/// No coercion is attempted. | ||
/// | ||
/// Functions like `coalesce` is `VariadicEqual`. | ||
VariadicEqualOrNull, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need a similar signature but an exact args number for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could do something like this (basically to flavor the type signature) 🤔 pub enum TypeSignature {
...
/// Rather than the usual coercion rules, special type union rules are applied
Union(Box<TypeSignature>)
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice idea! |
||
/// One or more arguments with arbitrary types | ||
VariadicAny, | ||
/// Fixed number of arguments of an arbitrary but equal type out of a list of valid types. | ||
|
@@ -193,6 +198,9 @@ impl TypeSignature { | |
TypeSignature::VariadicEqual => { | ||
vec!["CoercibleT, .., CoercibleT".to_string()] | ||
} | ||
TypeSignature::VariadicEqualOrNull => { | ||
vec!["T or Null, .., T or Null".to_string()] | ||
} | ||
TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()], | ||
TypeSignature::OneOf(sigs) => { | ||
sigs.iter().flat_map(|s| s.to_string_repr()).collect() | ||
|
@@ -226,6 +234,11 @@ impl TypeSignature { | |
_ => false, | ||
} | ||
} | ||
|
||
/// Skip coercion to match the signature. | ||
pub(crate) fn skip_coercion(&self) -> bool { | ||
matches!(self, TypeSignature::VariadicEqualOrNull) | ||
jayzhan211 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function. | ||
|
@@ -255,13 +268,20 @@ impl Signature { | |
volatility, | ||
} | ||
} | ||
/// An arbitrary number of arguments of the same type. | ||
/// One or more number of arguments to the same type. | ||
pub fn variadic_equal(volatility: Volatility) -> Self { | ||
Self { | ||
type_signature: TypeSignature::VariadicEqual, | ||
volatility, | ||
} | ||
} | ||
/// One or more number of arguments of the same type. | ||
pub fn variadic_equal_or_null(volatility: Volatility) -> Self { | ||
Self { | ||
type_signature: TypeSignature::VariadicEqualOrNull, | ||
volatility, | ||
} | ||
} | ||
/// An arbitrary number of arguments of any type. | ||
pub fn variadic_any(volatility: Volatility) -> Self { | ||
Self { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ use arrow::{ | |
use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; | ||
use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result}; | ||
|
||
use super::binary::{comparison_binary_numeric_coercion, comparison_coercion}; | ||
use super::binary::comparison_coercion; | ||
|
||
/// Performs type coercion for function arguments. | ||
/// | ||
|
@@ -62,11 +62,13 @@ pub fn data_types( | |
return Ok(current_types.to_vec()); | ||
} | ||
|
||
// Try and coerce the argument types to match the signature, returning the | ||
// coerced types from the first matching signature. | ||
for valid_types in valid_types { | ||
if let Some(types) = maybe_data_types(&valid_types, current_types) { | ||
return Ok(types); | ||
if !signature.type_signature.skip_coercion() { | ||
// Try and coerce the argument types to match the signature, returning the | ||
// coerced types from the first matching signature. | ||
for valid_types in valid_types { | ||
if let Some(types) = maybe_data_types(&valid_types, current_types) { | ||
return Ok(types); | ||
} | ||
} | ||
} | ||
|
||
|
@@ -184,6 +186,27 @@ fn get_valid_types( | |
.iter() | ||
.map(|valid_type| (0..*number).map(|_| valid_type.clone()).collect()) | ||
.collect(), | ||
TypeSignature::VariadicEqualOrNull => { | ||
current_types | ||
.iter() | ||
.find(|&t| t != &DataType::Null) | ||
.map_or_else( | ||
|| vec![vec![DataType::Null; current_types.len()]], | ||
|t| { | ||
let valid_types = current_types | ||
.iter() | ||
.map(|d| { | ||
if d != &DataType::Null { | ||
t.clone() | ||
} else { | ||
DataType::Null | ||
} | ||
}) | ||
.collect::<Vec<_>>(); | ||
vec![valid_types] | ||
}, | ||
) | ||
} | ||
TypeSignature::VariadicEqual => { | ||
let new_type = current_types.iter().skip(1).try_fold( | ||
current_types.first().unwrap().clone(), | ||
|
@@ -450,19 +473,7 @@ fn coerced_from<'a>( | |
{ | ||
Some(type_into.clone()) | ||
} | ||
// More coerce rules. | ||
// Note that not all rules in `comparison_coercion` can be reused here. | ||
// For example, all numeric types can be coerced into Utf8 for comparison, | ||
// but not for function arguments. | ||
_ => comparison_binary_numeric_coercion(type_into, type_from).and_then( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic is introduced in #9459, so I think it is safe to remove together with this PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @viirya |
||
|coerced_type| { | ||
if *type_into == coerced_type { | ||
Some(coerced_type) | ||
} else { | ||
None | ||
} | ||
}, | ||
), | ||
_ => None, | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,6 @@ use arrow::compute::{and, is_not_null, is_null}; | |
use arrow::datatypes::DataType; | ||
|
||
use datafusion_common::{exec_err, Result}; | ||
use datafusion_expr::type_coercion::functions::data_types; | ||
use datafusion_expr::ColumnarValue; | ||
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; | ||
|
||
|
@@ -41,7 +40,7 @@ impl Default for CoalesceFunc { | |
impl CoalesceFunc { | ||
pub fn new() -> Self { | ||
Self { | ||
signature: Signature::variadic_equal(Volatility::Immutable), | ||
signature: Signature::variadic_equal_or_null(Volatility::Immutable), | ||
} | ||
} | ||
} | ||
|
@@ -60,9 +59,11 @@ impl ScalarUDFImpl for CoalesceFunc { | |
} | ||
|
||
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { | ||
// COALESCE has multiple args and they might get coerced, get a preview of this | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 ❤️ |
||
let coerced_types = data_types(arg_types, self.signature()); | ||
coerced_types.map(|types| types[0].clone()) | ||
Ok(arg_types | ||
.iter() | ||
.find(|&t| t != &DataType::Null) | ||
.unwrap_or(&DataType::Null) | ||
.clone()) | ||
} | ||
|
||
/// coalesce evaluates to the first value which is not NULL | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we please link to https://docs.rs/datafusion/latest/datafusion/logical_expr/type_coercion/binary/fn.comparison_coercion.html that explains (however limited) what comparison coercion is?