Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-47768: Add "--chains NO-CHILDREN" to query-collections #1121

Merged
merged 2 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/changes/DM-47768.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added a `--chains NO-CHILDREN` mode to the `butler query-collections` CLI,
which returns results without recursing into `CHAINED` collections.
25 changes: 15 additions & 10 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,28 +394,33 @@ def prune_datasets(**kwargs: Any) -> None:
default="TREE",
help="""Affects how results are presented:

TABLE lists each dataset in table form, with columns for dataset name
and type, and a column that lists children of CHAINED datasets (if any
CHAINED datasets are found).
TABLE lists each collection in table form, with columns for collection
name and type, and a column that lists children of CHAINED collections
(if any CHAINED collections are found).

INVERSE-TABLE is like TABLE but instead of a column listing CHAINED
dataset children, it lists the parents of the dataset if it is contained
in any CHAINED collections.
collection children, it lists the parents of the collection if it is
contained in any CHAINED collections.

TREE recursively lists children below each CHAINED dataset in tree form.
TREE recursively lists children below each CHAINED collection in tree
form.

INVERSE-TREE recursively lists parent datasets below each dataset in
tree form.
INVERSE-TREE recursively lists parent collections below each collection
in tree form.

FLATTEN lists all datasets, including child datasets, in one list.
FLATTEN lists all collections, including children of CHAINED
collections, in one list.

NO-CHILDREN lists all collections in one list. CHAINED collections are
included, but they are not expanded to include their children.

[default: TREE]""",
# above, the default value is included, instead of using show_default, so
# that the default is printed on its own line instead of coming right after
# the FLATTEN text.
callback=to_upper,
type=click.Choice(
choices=("TABLE", "INVERSE-TABLE", "TREE", "INVERSE-TREE", "FLATTEN"),
choices=("TABLE", "INVERSE-TABLE", "TREE", "INVERSE-TREE", "FLATTEN", "NO-CHILDREN"),
case_sensitive=False,
),
)
Expand Down
20 changes: 11 additions & 9 deletions python/lsst/daf/butler/script/queryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from __future__ import annotations

from collections.abc import Iterable
from typing import Literal

from astropy.table import Table

Expand Down Expand Up @@ -166,15 +167,16 @@ def addCollection(info: CollectionInfo, level: int = 0) -> None:
return table


def _getFlatten(
repo: str,
glob: Iterable[str],
collection_type: Iterable[CollectionType],
def _getList(
repo: str, glob: Iterable[str], collection_type: Iterable[CollectionType], flatten_chains: bool
) -> Table:
"""Return collection results as a table representing a flat list of
collections.
"""
butler = Butler.from_config(repo)
collections = list(
butler.collections.query_info(
glob or "*", collection_types=frozenset(collection_type), flatten_chains=True
glob or "*", collection_types=frozenset(collection_type), flatten_chains=flatten_chains
)
)
names = [c.name for c in collections]
Expand All @@ -186,7 +188,7 @@ def queryCollections(
repo: str,
glob: Iterable[str],
collection_type: Iterable[CollectionType],
chains: str,
chains: Literal["INVERSE-TABLE", "TABLE", "TREE", "INVERSE-TREE", "FLATTEN", "NO-CHILDREN"],
) -> Table:
"""Get the collections whose names match an expression.

Expand All @@ -202,7 +204,6 @@ def queryCollections(
optional
If provided, only return collections of these types.
chains : `str`
Must be one of "FLATTEN", "TABLE", or "TREE" (case sensitive).
Affects contents and formatting of results, see
``cli.commands.query_collections``.

Expand All @@ -215,6 +216,7 @@ def queryCollections(
return _getTable(repo, glob, collection_type, inverse)
elif (inverse := chains == "INVERSE-TREE") or chains == "TREE":
return _getTree(repo, glob, collection_type, inverse)
elif chains == "FLATTEN":
return _getFlatten(repo, glob, collection_type)
elif chains == "FLATTEN" or chains == "NO-CHILDREN":
flatten = chains == "FLATTEN"
return _getList(repo, glob, collection_type, flatten)
raise RuntimeError(f"Value for --chains not recognized: {chains}")
18 changes: 18 additions & 0 deletions tests/test_cliCmdQueryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,24 @@ def testChained(self):
)
self.assertAstropyTablesEqual(readTable(result.output), expected, unorderedRows=True)

result = self.runner.invoke(cli, ["query-collections", "here", "--chains", "NO-CHILDREN"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
expected = Table(
array(
(
("calibration1", "CALIBRATION"),
("chain1", "CHAINED"),
("chain2", "CHAINED"),
("imported_g", "RUN"),
("imported_r", "RUN"),
("run1", "RUN"),
("tag1", "TAGGED"),
)
),
names=("Name", "Type"),
)
self.assertAstropyTablesEqual(readTable(result.output), expected, unorderedRows=True)

# Add a couple more run collections for chain testing
registry1.registerRun("run2")
registry1.registerRun("run3")
Expand Down
Loading