Skip to content

Commit

Permalink
Rewrite and disable Butler._query interfaces.
Browse files Browse the repository at this point in the history
This includes:

- replacing the Query and *QueryResults ABCs with concrete classes
  that delegate to another QueryDriver ABC;

- substantially reworking the public Query and *QueryResults
  interfaces, mostly to minimize the number of different ways to do
  various things (and hence limit complexity);

- adding a large suite of Pydantic models that can describe complex
  under-construction queries, allowing us to send them over the wire
  in RemoteButler.

Because QueryDriver doesn't have any concrete implementations yet,
this change means Butler._query no longer works at (previously it
delegated to the old registry.queries system).  A QueryDriver
implementation for DirectButler has been largely implemented on
another branch and will be added later.

For now, the only tests are those that rely on a mocked QueryDriver
(or don't require one at all).  These are in two files:

- test_query_interfaces.py tests the public interface objects,
  including the semi-public Pydantic models;

- test_query_utilities.py tests some utility classes (ColumnSet and
  OverlapsVisitor) that are expected to be used by all driver
  implementations to establish some behavioral invariants.

There is already substantial duplication with code in
lsst.daf.butler.registry.queries, and that will get worse when a
direct-SQL driver class is added.  Eventually the plan is to retire
almost all of lsst.daf.butler.registry.queries (except the
string-expression parser, which we'll move later) making the public
registry query interfaces delegate to lsst.daf.butler.queries instead,
but that will require both getting the latter fully functional and
RFC'ing the removal of some things we have no intention of doing in
the new system.
  • Loading branch information
TallJimbo committed Feb 26, 2024
1 parent 8672db3 commit 97d3e88
Show file tree
Hide file tree
Showing 31 changed files with 8,840 additions and 1,608 deletions.
12 changes: 10 additions & 2 deletions python/lsst/daf/butler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@
from ._named import *
from ._quantum import *
from ._quantum_backed import *
from ._query import *
from ._query_results import *
from ._storage_class import *
from ._storage_class_delegate import *
from ._timespan import *
Expand All @@ -80,6 +78,16 @@
# Only lift 'Progress' from 'progess'; the module is imported as-is above
from .progress import Progress

# Only import the main public symbols from queries
from .queries import (
ChainedDatasetQueryResults,
DataCoordinateQueryResults,
DatasetQueryResults,
DimensionRecordQueryResults,
Query,
SingleTypeDatasetQueryResults,
)

# Do not import or lift symbols from 'server' or 'server_models'.
# Import the registry subpackage directly for other symbols.
from .registry import (
Expand Down
109 changes: 72 additions & 37 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,19 @@
from abc import abstractmethod
from collections.abc import Collection, Iterable, Mapping, Sequence
from contextlib import AbstractContextManager
from types import EllipsisType
from typing import TYPE_CHECKING, Any, TextIO

from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.utils import doImportType
from lsst.utils.iteration import ensure_iterable
from lsst.utils.logging import getLogger

from ._butler_config import ButlerConfig, ButlerType
from ._butler_instance_options import ButlerInstanceOptions
from ._butler_repo_index import ButlerRepoIndex
from ._config import Config, ConfigSubset
from ._exceptions import EmptyQueryResultError
from ._limited_butler import LimitedButler
from .datastore import Datastore
from .dimensions import DimensionConfig
Expand All @@ -54,12 +57,12 @@
from ._dataset_type import DatasetType
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._file_dataset import FileDataset
from ._query import Query
from ._storage_class import StorageClass
from ._timespan import Timespan
from .datastore import DatasetRefURIs
from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
from .registry import CollectionArgType, Registry
from .queries import Query
from .registry import Registry
from .transfers import RepoExportContext

_LOG = getLogger(__name__)
Expand Down Expand Up @@ -1428,15 +1431,14 @@ def _query(self) -> AbstractContextManager[Query]:
"""
raise NotImplementedError()

@abstractmethod
def _query_data_ids(
self,
dimensions: DimensionGroup | Iterable[str] | str,
*,
data_id: DataId | None = None,
where: str = "",
bind: Mapping[str, Any] | None = None,
expanded: bool = False,
with_dimension_records: bool = False,
order_by: Iterable[str] | str | None = None,
limit: int | None = None,
offset: int = 0,
Expand Down Expand Up @@ -1466,7 +1468,7 @@ def _query_data_ids(
Values of collection type can be expanded in some cases; see
:ref:`daf_butler_dimension_expressions_identifiers` for more
information.
expanded : `bool`, optional
with_dimension_records : `bool`, optional
If `True` (default is `False`) then returned data IDs will have
dimension records.
order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
Expand Down Expand Up @@ -1511,19 +1513,32 @@ def _query_data_ids(
Raised when the arguments are incompatible, e.g. ``offset`` is
specified, but ``limit`` is not.
"""
raise NotImplementedError()
if data_id is None:
data_id = DataCoordinate.make_empty(self.dimensions)

Check warning on line 1517 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1517

Added line #L1517 was not covered by tests
with self._query() as query:
result = (

Check warning on line 1519 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1519

Added line #L1519 was not covered by tests
query.where(data_id, where, bind=bind, **kwargs)
.data_ids(dimensions)
.order_by(*ensure_iterable(order_by))
.limit(limit, offset)
)
if with_dimension_records:
result = result.with_dimension_records()
data_ids = list(result)

Check warning on line 1527 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1526-L1527

Added lines #L1526 - L1527 were not covered by tests
if explain and not data_ids:
raise EmptyQueryResultError(list(result.explain_no_results()))
return data_ids

Check warning on line 1530 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1529-L1530

Added lines #L1529 - L1530 were not covered by tests

@abstractmethod
def _query_datasets(
self,
dataset_type: Any,
collections: CollectionArgType | None = None,
dataset_type: str | Iterable[str] | DatasetType | Iterable[DatasetType] | EllipsisType,
collections: str | Iterable[str] | None = None,
*,
find_first: bool = True,
data_id: DataId | None = None,
where: str = "",
bind: Mapping[str, Any] | None = None,
expanded: bool = False,
with_dimension_records: bool = False,
explain: bool = True,
**kwargs: Any,
) -> list[DatasetRef]:
Expand All @@ -1533,17 +1548,13 @@ def _query_datasets(
----------
dataset_type : dataset type expression
An expression that fully or partially identifies the dataset types
to be queried. Allowed types include `DatasetType`, `str`,
`re.Pattern`, and iterables thereof. The special value ``...`` can
be used to query all dataset types. See
:ref:`daf_butler_dataset_type_expressions` for more information.
to be queried. Allowed types include `DatasetType`, `str`, and
iterables thereof. The special value ``...`` can be used to query
all dataset types. See :ref:`daf_butler_dataset_type_expressions`
for more information.
collections : collection expression, optional
An expression that identifies the collections to search, such as a
`str` (for full matches or partial matches via globs), `re.Pattern`
(for partial matches), or iterable thereof. ``...`` can be used to
search all collections (actually just all `~CollectionType.RUN`
collections, because this will still find all datasets).
If not provided, the default collections are used. See
A collection name or iterable of collection names to search. If not
provided, the default collections are used. See
:ref:`daf_butler_collection_expressions` for more information.
find_first : `bool`, optional
If `True` (default), for each result data ID, only yield one
Expand All @@ -1552,20 +1563,20 @@ def _query_datasets(
order of ``collections`` passed in). If `True`, ``collections``
must not contain regular expressions and may not be ``...``.
data_id : `dict` or `DataCoordinate`, optional
A data ID whose key-value pairs are used as equality constraints
in the query.
A data ID whose key-value pairs are used as equality constraints in
the query.
where : `str`, optional
A string expression similar to a SQL WHERE clause. May involve
any column of a dimension table or (as a shortcut for the primary
key column of a dimension table) dimension name. See
A string expression similar to a SQL WHERE clause. May involve any
column of a dimension table or (as a shortcut for the primary key
column of a dimension table) dimension name. See
:ref:`daf_butler_dimension_expressions` for more information.
bind : `~collections.abc.Mapping`, optional
Mapping containing literal values that should be injected into the
``where`` expression, keyed by the identifiers they replace.
Values of collection type can be expanded in some cases; see
``where`` expression, keyed by the identifiers they replace. Values
of collection type can be expanded in some cases; see
:ref:`daf_butler_dimension_expressions_identifiers` for more
information.
expanded : `bool`, optional
with_dimension_records : `bool`, optional
If `True` (default is `False`) then returned data IDs will have
dimension records.
explain : `bool`, optional
Expand All @@ -1586,8 +1597,8 @@ def _query_datasets(
IDs are guaranteed to include values for all implied dimensions
(i.e. `DataCoordinate.hasFull` will return `True`), but will not
include dimension records (`DataCoordinate.hasRecords` will be
`False`) unless `~.queries.DatasetQueryResults.expanded` is
called on the result object (which returns a new one).
`False`) unless `~.queries.DatasetQueryResults.expanded` is called
on the result object (which returns a new one).
Raises
------
Expand All @@ -1609,14 +1620,26 @@ def _query_datasets(
Notes
-----
When multiple dataset types are queried in a single call, the
results of this operation are equivalent to querying for each dataset
type separately in turn, and no information about the relationships
between datasets of different types is included.
When multiple dataset types are queried in a single call, the results
of this operation are equivalent to querying for each dataset type
separately in turn, and no information about the relationships between
datasets of different types is included.
"""
raise NotImplementedError()
if data_id is None:
data_id = DataCoordinate.make_empty(self.dimensions)

Check warning on line 1629 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1629

Added line #L1629 was not covered by tests
with self._query() as query:
result = query.where(data_id, where, bind=bind, **kwargs).datasets(

Check warning on line 1631 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1631

Added line #L1631 was not covered by tests
dataset_type,
collections=collections,
find_first=find_first,
)
if with_dimension_records:
result = result.with_dimension_records()
refs = list(result)

Check warning on line 1638 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1637-L1638

Added lines #L1637 - L1638 were not covered by tests
if explain and not refs:
raise EmptyQueryResultError(list(result.explain_no_results()))
return refs

Check warning on line 1641 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1640-L1641

Added lines #L1640 - L1641 were not covered by tests

@abstractmethod
def _query_dimension_records(
self,
element: str,
Expand Down Expand Up @@ -1691,7 +1714,19 @@ def _query_dimension_records(
when ``collections`` is `None` and default butler collections are
not defined.
"""
raise NotImplementedError()
if data_id is None:
data_id = DataCoordinate.make_empty(self.dimensions)

Check warning on line 1718 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1718

Added line #L1718 was not covered by tests
with self._query() as query:
result = (

Check warning on line 1720 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1720

Added line #L1720 was not covered by tests
query.where(data_id, where, bind=bind, **kwargs)
.dimension_records(element)
.order_by(*ensure_iterable(order_by))
.limit(limit, offset)
)
dimension_records = list(result)

Check warning on line 1726 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1726

Added line #L1726 was not covered by tests
if explain and not dimension_records:
raise EmptyQueryResultError(list(result.explain_no_results()))
return dimension_records

Check warning on line 1729 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1728-L1729

Added lines #L1728 - L1729 were not covered by tests

@abstractmethod
def _clone(
Expand Down
Loading

0 comments on commit 97d3e88

Please sign in to comment.