Skip to content

Commit

Permalink
fix pushdown for pyarrow filter IsNull
Browse files Browse the repository at this point in the history
The conversion was incorrectly passing in the expression itself as the `nan_as_null` argument. This caused the pushdown to silently fail.
  • Loading branch information
Michael-J-Ward committed Jun 18, 2024
1 parent c7ea90d commit df69c6a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
19 changes: 19 additions & 0 deletions python/datafusion/tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,25 @@ def test_dataset_filter(ctx, capfd):
assert result[0].column(1) == pa.array([-3])


def test_pyarrow_predicate_pushdown_is_null(ctx, capfd):
"""Ensure that pyarrow filter gets pushed down for `IsNull`"""
# create a RecordBatch and register it as a pyarrow.dataset.Dataset
batch = pa.RecordBatch.from_arrays(
[pa.array([1, 2, 3]), pa.array([4, 5, 6]), pa.array([7, None, 9])],
names=["a", "b", "c"],
)
dataset = ds.dataset([batch])
ctx.register_dataset("t", dataset)
# Make sure the filter was pushed down in Physical Plan
df = ctx.sql("SELECT a FROM t WHERE c is NULL")
df.explain()
captured = capfd.readouterr()
assert "filter_expr=is_null(c, {nan_is_null=false})" in captured.out

result = df.collect()
assert result[0].column(0) == pa.array([2])


def test_dataset_filter_nested_data(ctx):
# create Arrow StructArrays to test nested data types
data = pa.StructArray.from_arrays(
Expand Down
9 changes: 7 additions & 2 deletions src/pyarrow_filter_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,13 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
let expr = PyArrowFilterExpression::try_from(expr.as_ref())?
.0
.into_bound(py);
// TODO: this expression does not seems like it should be `call_method0`
Ok(expr.clone().call_method1("is_null", (expr,))?)

// https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Expression.html#pyarrow.dataset.Expression.is_null
// Whether floating-point NaNs are considered null.
let nan_is_null = false;

let res = expr.call_method1("is_null", (nan_is_null,))?;
Ok(res)
}
Expr::Between(Between {
expr,
Expand Down

0 comments on commit df69c6a

Please sign in to comment.