Skip to content

Commit

Permalink
Fix Float and Decimal coercion
Browse files Browse the repository at this point in the history
Before the change, Float* and Decimal* would coerce to a decimal type.
However, decimal cannot store all the float values: different range,
NaN, and infinity. Coercing to floating point is more desirable and also
what other systems typically do.
  • Loading branch information
findepi committed Jan 24, 2025
1 parent 774d3cb commit 056e2d0
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 18 deletions.
8 changes: 7 additions & 1 deletion datafusion/core/tests/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,13 @@ impl TestOutput {
/// and the appropriate scenario
impl ContextWithParquet {
async fn new(scenario: Scenario, unit: Unit) -> Self {
Self::with_config(scenario, unit, SessionConfig::new()).await
let mut session_config = SessionConfig::new();
// TODO (https://github.com/apache/datafusion/issues/12817) once this is the default behavior, remove from here
session_config
.options_mut()
.sql_parser
.parse_float_as_decimal = true;
Self::with_config(scenario, unit, session_config).await
}

async fn with_config(
Expand Down
22 changes: 5 additions & 17 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -928,9 +928,6 @@ fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
Int16 | UInt16 => Some(Decimal128(5, 0)),
Int32 | UInt32 => Some(Decimal128(10, 0)),
Int64 | UInt64 => Some(Decimal128(20, 0)),
// TODO if we convert the floating-point data to the decimal type, it maybe overflow.
Float32 => Some(Decimal128(14, 7)),
Float64 => Some(Decimal128(30, 15)),
_ => None,
}
}
Expand All @@ -946,9 +943,6 @@ fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType
Int16 | UInt16 => Some(Decimal256(5, 0)),
Int32 | UInt32 => Some(Decimal256(10, 0)),
Int64 | UInt64 => Some(Decimal256(20, 0)),
// TODO if we convert the floating-point data to the decimal type, it maybe overflow.
Float32 => Some(Decimal256(14, 7)),
Float64 => Some(Decimal256(30, 15)),
_ => None,
}
}
Expand Down Expand Up @@ -1494,8 +1488,8 @@ mod tests {
DataType::Decimal128(20, 3),
DataType::Decimal128(20, 3),
DataType::Decimal128(23, 3),
DataType::Decimal128(24, 7),
DataType::Decimal128(32, 15),
DataType::Float32,
DataType::Float64,
DataType::Decimal128(38, 10),
DataType::Decimal128(25, 8),
DataType::Decimal128(20, 3),
Expand Down Expand Up @@ -1541,14 +1535,8 @@ mod tests {
coerce_numeric_type_to_decimal(&DataType::Int64).unwrap(),
DataType::Decimal128(20, 0)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Float32).unwrap(),
DataType::Decimal128(14, 7)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(),
DataType::Decimal128(30, 15)
);
assert_eq!(coerce_numeric_type_to_decimal(&DataType::Float32), None);
assert_eq!(coerce_numeric_type_to_decimal(&DataType::Float64), None);
}

#[test]
Expand Down Expand Up @@ -2013,7 +2001,7 @@ mod tests {
DataType::Float64,
DataType::Decimal128(10, 3),
Operator::Gt,
DataType::Decimal128(30, 15)
DataType::Float64
);
test_coercion_binary_rule!(
DataType::Int64,
Expand Down
6 changes: 6 additions & 0 deletions datafusion/sqllogictest/test_files/math.slt
Original file line number Diff line number Diff line change
Expand Up @@ -694,3 +694,9 @@ select FACTORIAL(350943270);

statement ok
drop table signed_integers

# Should not fail. The operands should coerce to float
query B
SELECT '1'::decimal(10,0) = '1e40'::double;
----
false
15 changes: 15 additions & 0 deletions datafusion/sqllogictest/test_files/union.slt
Original file line number Diff line number Diff line change
Expand Up @@ -836,3 +836,18 @@ physical_plan
# Clean up after the test
statement ok
drop table aggregate_test_100;

query T
SELECT DISTINCT arrow_typeof(a) FROM (SELECT '1'::float UNION ALL SELECT '1'::decimal(10)) t(a)
----
Float32

query T
SELECT DISTINCT arrow_typeof(a) FROM (SELECT '1'::decimal(10) UNION ALL SELECT '1'::float ) t(a)
----
Float32

query T
SELECT DISTINCT arrow_typeof(a) FROM (SELECT '1'::decimal(10) UNION ALL SELECT '1'::double) t(a)
----
Float64

0 comments on commit 056e2d0

Please sign in to comment.