From b3c00211494ecdcc1319473b0e88fe7379a3c183 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 31 Jan 2025 11:31:50 +0100 Subject: [PATCH 1/4] chore: update pre-commit hooks (#3389) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update pre-commit hooks updates: - [github.com/astral-sh/ruff-pre-commit: v0.9.2 → v0.9.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.9.2...v0.9.3) - [github.com/codespell-project/codespell: v2.3.0 → v2.4.0](https://github.com/codespell-project/codespell/compare/v2.3.0...v2.4.0) * Update .pre-commit-config.yaml --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Henry Schreiner --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c98989297..9894d92858 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,14 +27,14 @@ repos: additional_dependencies: [pyyaml] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.2 + rev: v0.9.3 hooks: - id: ruff args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell args: ["-L", "ue,subjet,parms,fo,numer,thre,nin,nout"] From 5aa9698d457ca80bb4a0bdacab35e57818a28fa1 Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Fri, 31 Jan 2025 05:52:23 -0500 Subject: [PATCH 2/4] feat: add a non-touching `ak.zip`, called `'ak.zip_no_broadcast'` (#3390) * feat: add a non-touching ak.zip, called 'ak.unsafe_zip' * fix getting correct length * add tests * check same lengths (more safe) * unsafe_zip -> zip_no_broadcast; check equal offsets at runtime with actual data * fix typo in doc string --------- Co-authored-by: Ianna Osborne --- src/awkward/operations/__init__.py | 1 + src/awkward/operations/ak_zip_no_broadcast.py | 222 ++++++++++++++++++ tests/test_3390_ak_zip_no_broadcast.py | 58 +++++ 3 files changed, 281 insertions(+) create mode 100644 src/awkward/operations/ak_zip_no_broadcast.py create mode 100644 tests/test_3390_ak_zip_no_broadcast.py diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index 91ebc9c184..306a49bb48 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -123,3 +123,4 @@ from awkward.operations.ak_without_parameters import * from awkward.operations.ak_zeros_like import * from awkward.operations.ak_zip import * +from awkward.operations.ak_zip_no_broadcast import * diff --git a/src/awkward/operations/ak_zip_no_broadcast.py b/src/awkward/operations/ak_zip_no_broadcast.py new file mode 100644 index 0000000000..87e7ccb1b2 --- /dev/null +++ b/src/awkward/operations/ak_zip_no_broadcast.py @@ -0,0 +1,222 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +from collections.abc import Mapping +from functools import reduce + +import awkward as ak +from awkward._dispatch import high_level_function +from awkward._layout import HighLevelContext, ensure_same_backend +from awkward._namedaxis import _get_named_axis, _unify_named_axis +from awkward._nplikes.numpy_like import NumpyMetadata + +__all__ = ("zip_no_broadcast",) + +np = NumpyMetadata.instance() + + +@high_level_function() +def zip_no_broadcast( + arrays, + *, + parameters=None, + with_name=None, + highlevel=True, + behavior=None, + attrs=None, +): + """ + Args: + arrays (mapping or sequence of arrays): Each value in this mapping or + sequence can be any array-like data that #ak.to_layout recognizes. + parameters (None or dict): Parameters for the new + #ak.contents.RecordArray node that is created by this operation. + with_name (None or str): Assigns a `"__record__"` name to the new + #ak.contents.RecordArray node that is created by this operation + (overriding `parameters`, if necessary). + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. + attrs (None or dict): Custom attributes for the output array, if + high-level. + + Combines `arrays` into a single structure as the fields of a collection + of records or the slots of a collection of tuples. + + Caution: unlike #ak.zip this function will _not_ broadcast the arrays together. + During typetracing, it assumes that the given arrays have already the same layouts and lengths. + + This operation may be thought of as the opposite of projection in + #ak.Array.__getitem__, which extracts fields one at a time, or + #ak.unzip, which extracts them all in one call. + + Consider the following arrays, `one` and `two`. + + >>> one = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6]]) + >>> two = ak.Array([["a", "b", "c"], [], ["d", "e"], ["f"]]) + + Zipping them together using a dict creates a collection of records with + the same nesting structure as `one` and `two`. + + >>> ak.zip_no_broadcast({"x": one, "y": two}).show() + [[{x: 1.1, y: 'a'}, {x: 2.2, y: 'b'}, {x: 3.3, y: 'c'}], + [], + [{x: 4.4, y: 'd'}], + []] + + Doing so with a list creates tuples, whose fields are not named. + + >>> ak.zip_no_broadcast([one, two]).show() + [[(1.1, 'a'), (2.2, 'b'), (3.3, 'c')], + [], + [(4.4, 'd')], + []] + + See also #ak.zip and #ak.unzip. + """ + # Dispatch + if isinstance(arrays, Mapping): + yield arrays.values() + else: + yield arrays + + # Implementation + return _impl( + arrays, + parameters, + with_name, + highlevel, + behavior, + attrs, + ) + + +def _impl( + arrays, + parameters, + with_name, + highlevel, + behavior, + attrs, +): + with HighLevelContext(behavior=behavior, attrs=attrs) as ctx: + if isinstance(arrays, Mapping): + layouts = ensure_same_backend( + *( + ctx.unwrap( + x, + allow_record=False, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + ) + for x in arrays.values() + ) + ) + fields = list(arrays.keys()) + + # propagate named axis from input to output, + # use strategy "unify" (see: awkward._namedaxis) + out_named_axis = reduce( + _unify_named_axis, map(_get_named_axis, arrays.values()) + ) + + else: + layouts = ensure_same_backend( + *( + ctx.unwrap( + x, + allow_record=False, + allow_unknown=False, + none_policy="pass-through", + primitive_policy="pass-through", + ) + for x in arrays + ) + ) + fields = None + + # propagate named axis from input to output, + # use strategy "unify" (see: awkward._namedaxis) + out_named_axis = reduce(_unify_named_axis, map(_get_named_axis, arrays)) + + # determine backend + backend = next((b.backend for b in layouts if hasattr(b, "backend")), "cpu") + + if with_name is not None: + if parameters is None: + parameters = {} + else: + parameters = dict(parameters) + parameters["__record__"] = with_name + + # only allow all NumpyArrays and ListOffsetArrays + if all(isinstance(layout, ak.contents.NumpyArray) for layout in layouts): + length = _check_equal_lengths(layouts) + out = ak.contents.RecordArray( + layouts, fields, length=length, parameters=parameters, backend=backend + ) + elif all(isinstance(layout, ak.contents.ListOffsetArray) for layout in layouts): + contents = [] + for layout in layouts: + # get the content of the ListOffsetArray + if not isinstance(layout.content, ak.contents.NumpyArray): + raise ValueError( + "can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents" + ) + contents.append(layout.content) + + if backend.name == "typetracer": + # just get from the first one + # we're in typetracer mode, so we can't check the offsets (see else branch) + offsets = layouts[0].offsets + else: + # this is at 'runtime' with actual data, that means we can check the offsets, + # but only those that have actual data, i.e. no PlaceholderArrays + # so first, let's filter out any PlaceholderArrays + comparable_offsets = filter( + lambda o: not isinstance(o, ak._nplikes.placeholder.PlaceholderArray), + (layout.offsets for layout in layouts), + ) + # check that offsets are the same + first = next(comparable_offsets) + if not all( + first.nplike.all(offsets.data == first.data) + for offsets in comparable_offsets + ): + raise ValueError("all ListOffsetArrays must have the same offsets") + offsets = first + + length = _check_equal_lengths(contents) + out = ak.contents.ListOffsetArray( + offsets=offsets, + content=ak.contents.RecordArray( + contents, fields, length=length, parameters=parameters, backend=backend + ), + ) + else: + raise ValueError( + "all array layouts must be either NumpyArrays or ListOffsetArrays" + ) + + # Unify named axes propagated through the broadcast + wrapped_out = ctx.wrap(out, highlevel=highlevel) + return ak.operations.ak_with_named_axis._impl( + wrapped_out, + named_axis=out_named_axis, + highlevel=highlevel, + behavior=ctx.behavior, + attrs=ctx.attrs, + ) + + +def _check_equal_lengths( + contents: ak.contents.Content, +) -> int | ak._nplikes.shape.UnknownLength: + length = contents[0].length + for layout in contents: + if layout.length != length: + raise ValueError("all arrays must have the same length") + return length diff --git a/tests/test_3390_ak_zip_no_broadcast.py b/tests/test_3390_ak_zip_no_broadcast.py new file mode 100644 index 0000000000..fe327ce4ec --- /dev/null +++ b/tests/test_3390_ak_zip_no_broadcast.py @@ -0,0 +1,58 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE +# ruff: noqa: E402 + +from __future__ import annotations + +import awkward as ak + + +def test_ak_zip_no_broadcast_NumpyArray_dict(): + a = ak.Array([1]) + b = ak.Array([2]) + c = ak.zip_no_broadcast({"a": a, "b": b}) + assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b})) + + +def test_ak_zip_no_broadcast_ListOffsetArray_dict(): + a = ak.Array([[1], []]) + b = ak.Array([[2], []]) + c = ak.zip_no_broadcast({"a": a, "b": b}) + assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b})) + + +def test_ak_zip_no_broadcast_NumpyArray_list(): + a = ak.Array([1]) + b = ak.Array([2]) + c = ak.zip_no_broadcast([a, b]) + assert ak.to_list(c) == ak.to_list(ak.zip([a, b])) + + +def test_ak_zip_no_broadcast_ListOffsetArray_list(): + a = ak.Array([[1], []]) + b = ak.Array([[2], []]) + c = ak.zip_no_broadcast([a, b]) + assert ak.to_list(c) == ak.to_list(ak.zip([a, b])) + + +def test_typetracer_NumpyArray_non_touching(): + tracer = ak.Array([1], backend="typetracer") + + tracer, report = ak.typetracer.typetracer_with_report( + tracer.layout.form_with_key(), highlevel=True + ) + + _ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer}) + assert len(report.shape_touched) == 1 + assert len(report.data_touched) == 0 + + +def test_typetracer_ListOffsetArray_non_touching(): + tracer = ak.Array([[1], [], [2, 3]], backend="typetracer") + + tracer, report = ak.typetracer.typetracer_with_report( + tracer.layout.form_with_key(), highlevel=True + ) + + _ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer}) + assert len(report.shape_touched) == 1 + assert len(report.data_touched) == 0 From 3320f1ed9749f6f9c52c2d780a0df440d331cdc2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 31 Jan 2025 15:01:06 +0100 Subject: [PATCH 3/4] chore(deps): bump the actions group with 2 updates (#3388) Bumps the actions group with 2 updates: [actions/attest-build-provenance](https://github.com/actions/attest-build-provenance) and [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). Updates `actions/attest-build-provenance` from 2.1.0 to 2.2.0 - [Release notes](https://github.com/actions/attest-build-provenance/releases) - [Changelog](https://github.com/actions/attest-build-provenance/blob/main/RELEASE.md) - [Commits](https://github.com/actions/attest-build-provenance/compare/7668571508540a607bdfd90a87a560489fe372eb...520d128f165991a6c774bcb264f323e3d70747f4) Updates `pypa/gh-action-pypi-publish` from 1.12.3 to 1.12.4 - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.12.3...v1.12.4) --- updated-dependencies: - dependency-name: actions/attest-build-provenance dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/deploy-cpp.yml | 4 ++-- .github/workflows/deploy.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy-cpp.yml b/.github/workflows/deploy-cpp.yml index 7fa43c7681..5e8bdd09a3 100644 --- a/.github/workflows/deploy-cpp.yml +++ b/.github/workflows/deploy-cpp.yml @@ -35,8 +35,8 @@ jobs: run: ls -l dist/ - name: Generate artifact attestation for sdist and wheel - uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0 + uses: actions/attest-build-provenance@520d128f165991a6c774bcb264f323e3d70747f4 # v2.2.0 with: subject-path: "dist/awkward*cpp-*" - - uses: pypa/gh-action-pypi-publish@v1.12.3 + - uses: pypa/gh-action-pypi-publish@v1.12.4 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 5a953109a4..b727c711f8 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -88,7 +88,7 @@ jobs: run: pipx run twine check dist/* - name: Generate artifact attestation for sdist and wheel - uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0 + uses: actions/attest-build-provenance@520d128f165991a6c774bcb264f323e3d70747f4 # v2.2.0 with: subject-path: "dist/awkward-*" @@ -135,7 +135,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: gh attestation verify dist/awkward-*.whl --repo ${{ github.repository }} - - uses: pypa/gh-action-pypi-publish@v1.12.3 + - uses: pypa/gh-action-pypi-publish@v1.12.4 publish-headers: name: "Publish header-only libraries alongside release" From 3d14f2e74c7b6d99b09e436f90de4722f5959548 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Fri, 31 Jan 2025 15:23:39 +0100 Subject: [PATCH 4/4] version = "2.7.4" --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b0e2a34a35..6431a0db2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "hatchling.build" [project] name = "awkward" -version = "2.7.3" +version = "2.7.4" description = "Manipulate JSON-like data with NumPy-like idioms." license = { text = "BSD-3-Clause" } requires-python = ">=3.9"