Skip to content

Commit

Permalink
Add Python bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Feb 26, 2025
1 parent d97a9a7 commit e5ebef4
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/source/python/api/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Aggregations
min
min_max
mode
pivot_wider
product
quantile
stddev
Expand Down Expand Up @@ -557,6 +558,7 @@ Compute Options
PadOptions
PairwiseOptions
PartitionNthOptions
PivotWiderOptions
QuantileOptions
ReplaceSliceOptions
ReplaceSubstringOptions
Expand Down
44 changes: 44 additions & 0 deletions python/pyarrow/_compute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2396,6 +2396,50 @@ class RankQuantileOptions(_RankQuantileOptions):
self._set_options(sort_keys, null_placement)


cdef class _PivotWiderOptions(FunctionOptions):

def _set_options(self, key_names, unexpected_key_behavior):
cdef:
vector[c_string] c_key_names
PivotWiderUnexpectedKeyBehavior c_unexpected_key_behavior
if unexpected_key_behavior == "ignore":
c_unexpected_key_behavior = PivotWiderUnexpectedKeyBehavior_Ignore
elif unexpected_key_behavior == "raise":
c_unexpected_key_behavior = PivotWiderUnexpectedKeyBehavior_Raise
else:
raise ValueError(
f"Unsupported value for unexpected_key_behavior: "
f"expected 'ignore' or 'raise', got {unexpected_key_behavior!r}")

for k in key_names:
c_key_names.push_back(tobytes(k))

self.wrapped.reset(
new CPivotWiderOptions(move(c_key_names), c_unexpected_key_behavior)
)


class PivotWiderOptions(_PivotWiderOptions):
"""
Options for the `pivot_wider` function.
Parameters
----------
key_names : sequence of str
The pivot key names expected in the pivot key column.
For each entry in `key_names`, a column with the same name is emitted
in the struct output.
unexpected_key_behavior : str, default "ignore"
The behavior when pivot keys not in `key_names` are encountered.
Accepted values are "ignore", "raise".
If "ignore", unexpected keys are silently ignored.
If "raise", unexpected keys raise a KeyError.
"""

def __init__(self, key_names, *, unexpected_key_behavior="ignore"):
self._set_options(key_names, unexpected_key_behavior)


cdef class Expression(_Weakrefable):
"""
A logical expression to be evaluated against some input.
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
PadOptions,
PairwiseOptions,
PartitionNthOptions,
PivotWiderOptions,
QuantileOptions,
RandomOptions,
RankOptions,
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2823,6 +2823,16 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
vector[CSortKey] sort_keys
CNullPlacement null_placement

cdef enum PivotWiderUnexpectedKeyBehavior \
"arrow::compute::PivotWiderOptions::UnexpectedKeyBehavior":
PivotWiderUnexpectedKeyBehavior_Ignore "arrow::compute::PivotWiderOptions::kIgnore"
PivotWiderUnexpectedKeyBehavior_Raise "arrow::compute::PivotWiderOptions::kRaise"

cdef cppclass CPivotWiderOptions \
"arrow::compute::PivotWiderOptions"(CFunctionOptions):
CPivotWiderOptions(vector[c_string] key_names,
PivotWiderUnexpectedKeyBehavior)

cdef enum DatumType" arrow::Datum::type":
DatumType_NONE" arrow::Datum::NONE"
DatumType_SCALAR" arrow::Datum::SCALAR"
Expand Down
29 changes: 28 additions & 1 deletion python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def test_option_class_equality(request):
pc.ArraySortOptions(),
pc.AssumeTimezoneOptions("UTC"),
pc.CastOptions.safe(pa.int8()),
pc.CumulativeOptions(start=None, skip_nulls=False),
pc.CountOptions(),
pc.DayOfWeekOptions(count_from_zero=False, week_start=0),
pc.DictionaryEncodeOptions(),
Expand All @@ -167,7 +168,7 @@ def test_option_class_equality(request):
pc.PadOptions(5),
pc.PairwiseOptions(period=1),
pc.PartitionNthOptions(1, null_placement="at_start"),
pc.CumulativeOptions(start=None, skip_nulls=False),
pc.PivotWiderOptions(["height"], unexpected_key_behavior="raise"),
pc.QuantileOptions(),
pc.RandomOptions(),
pc.RankOptions(sort_keys="ascending",
Expand Down Expand Up @@ -3785,3 +3786,29 @@ def test_pairwise_diff():
with pytest.raises(pa.ArrowInvalid,
match="overflow"):
pa.compute.pairwise_diff_checked(arr, period=-1)


def test_pivot_wider():
key_names = ["width", "height"]

result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11])
assert result.as_py() == {}

result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
key_names)
assert result.as_py() == {"width": None, "height": 10}
# check key order
assert list(result.as_py()) == ["width", "height"]

result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
key_names=key_names)
assert result.as_py() == {"width": None, "height": 10}

with pytest.raises(KeyError, match="Unexpected pivot key: depth"):
result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
key_names=key_names,
unexpected_key_behavior="raise")

with pytest.raises(ValueError, match="Encountered more than one non-null value"):
result = pc.pivot_wider(["height", "width", "height"], [10, None, 11],
key_names=key_names)

0 comments on commit e5ebef4

Please sign in to comment.