diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 5423eebfbab40..0205457fec751 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -38,6 +38,7 @@ Aggregations min min_max mode + pivot_wider product quantile stddev @@ -557,6 +558,7 @@ Compute Options PadOptions PairwiseOptions PartitionNthOptions + PivotWiderOptions QuantileOptions ReplaceSliceOptions ReplaceSubstringOptions diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index d23286dcdd02e..ee61077aa80ca 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2396,6 +2396,50 @@ class RankQuantileOptions(_RankQuantileOptions): self._set_options(sort_keys, null_placement) +cdef class _PivotWiderOptions(FunctionOptions): + + def _set_options(self, key_names, unexpected_key_behavior): + cdef: + vector[c_string] c_key_names + PivotWiderUnexpectedKeyBehavior c_unexpected_key_behavior + if unexpected_key_behavior == "ignore": + c_unexpected_key_behavior = PivotWiderUnexpectedKeyBehavior_Ignore + elif unexpected_key_behavior == "raise": + c_unexpected_key_behavior = PivotWiderUnexpectedKeyBehavior_Raise + else: + raise ValueError( + f"Unsupported value for unexpected_key_behavior: " + f"expected 'ignore' or 'raise', got {unexpected_key_behavior!r}") + + for k in key_names: + c_key_names.push_back(tobytes(k)) + + self.wrapped.reset( + new CPivotWiderOptions(move(c_key_names), c_unexpected_key_behavior) + ) + + +class PivotWiderOptions(_PivotWiderOptions): + """ + Options for the `pivot_wider` function. + + Parameters + ---------- + key_names : sequence of str + The pivot key names expected in the pivot key column. + For each entry in `key_names`, a column with the same name is emitted + in the struct output. + unexpected_key_behavior : str, default "ignore" + The behavior when pivot keys not in `key_names` are encountered. + Accepted values are "ignore", "raise". + If "ignore", unexpected keys are silently ignored. + If "raise", unexpected keys raise a KeyError. + """ + + def __init__(self, key_names, *, unexpected_key_behavior="ignore"): + self._set_options(key_names, unexpected_key_behavior) + + cdef class Expression(_Weakrefable): """ A logical expression to be evaluated against some input. diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 5348336235118..e2c17ee61d8ab 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -53,6 +53,7 @@ PadOptions, PairwiseOptions, PartitionNthOptions, + PivotWiderOptions, QuantileOptions, RandomOptions, RankOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 556696e3442e9..8e666b114bd89 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2823,6 +2823,16 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: vector[CSortKey] sort_keys CNullPlacement null_placement + cdef enum PivotWiderUnexpectedKeyBehavior \ + "arrow::compute::PivotWiderOptions::UnexpectedKeyBehavior": + PivotWiderUnexpectedKeyBehavior_Ignore "arrow::compute::PivotWiderOptions::kIgnore" + PivotWiderUnexpectedKeyBehavior_Raise "arrow::compute::PivotWiderOptions::kRaise" + + cdef cppclass CPivotWiderOptions \ + "arrow::compute::PivotWiderOptions"(CFunctionOptions): + CPivotWiderOptions(vector[c_string] key_names, + PivotWiderUnexpectedKeyBehavior) + cdef enum DatumType" arrow::Datum::type": DatumType_NONE" arrow::Datum::NONE" DatumType_SCALAR" arrow::Datum::SCALAR" diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ef02b476bdc9a..99b3047d66fd0 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -145,6 +145,7 @@ def test_option_class_equality(request): pc.ArraySortOptions(), pc.AssumeTimezoneOptions("UTC"), pc.CastOptions.safe(pa.int8()), + pc.CumulativeOptions(start=None, skip_nulls=False), pc.CountOptions(), pc.DayOfWeekOptions(count_from_zero=False, week_start=0), pc.DictionaryEncodeOptions(), @@ -167,7 +168,7 @@ def test_option_class_equality(request): pc.PadOptions(5), pc.PairwiseOptions(period=1), pc.PartitionNthOptions(1, null_placement="at_start"), - pc.CumulativeOptions(start=None, skip_nulls=False), + pc.PivotWiderOptions(["height"], unexpected_key_behavior="raise"), pc.QuantileOptions(), pc.RandomOptions(), pc.RankOptions(sort_keys="ascending", @@ -3785,3 +3786,29 @@ def test_pairwise_diff(): with pytest.raises(pa.ArrowInvalid, match="overflow"): pa.compute.pairwise_diff_checked(arr, period=-1) + + +def test_pivot_wider(): + key_names = ["width", "height"] + + result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11]) + assert result.as_py() == {} + + result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11], + key_names) + assert result.as_py() == {"width": None, "height": 10} + # check key order + assert list(result.as_py()) == ["width", "height"] + + result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11], + key_names=key_names) + assert result.as_py() == {"width": None, "height": 10} + + with pytest.raises(KeyError, match="Unexpected pivot key: depth"): + result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11], + key_names=key_names, + unexpected_key_behavior="raise") + + with pytest.raises(ValueError, match="Encountered more than one non-null value"): + result = pc.pivot_wider(["height", "width", "height"], [10, None, 11], + key_names=key_names)