-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
5,961 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from ._base import * | ||
from ._data_coordinate_query_results import * | ||
from ._dataset_query_results import * | ||
from ._dimension_record_query_results import * | ||
from ._query import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from __future__ import annotations | ||
|
||
__all__ = ("QueryBase", "HomogeneousQueryBase", "CountableQueryBase", "QueryResultsBase") | ||
|
||
from abc import ABC, abstractmethod | ||
from collections.abc import Iterable, Set | ||
from typing import Any, Self | ||
|
||
from ..dimensions import DimensionGroup | ||
from .convert_args import convert_order_by_args | ||
from .driver import QueryDriver | ||
from .expression_factory import ExpressionProxy | ||
from .tree import OrderExpression, QueryTree | ||
|
||
|
||
class QueryBase(ABC): | ||
@abstractmethod | ||
def any(self, *, execute: bool = True, exact: bool = True) -> bool: | ||
"""Test whether the query would return any rows. | ||
Parameters | ||
---------- | ||
execute : `bool`, optional | ||
If `True`, execute at least a ``LIMIT 1`` query if it cannot be | ||
determined prior to execution that the query would return no rows. | ||
exact : `bool`, optional | ||
If `True`, run the full query and perform post-query filtering if | ||
needed, until at least one result row is found. If `False`, the | ||
returned result does not account for post-query filtering, and | ||
hence may be `True` even when all result rows would be filtered | ||
out. | ||
Returns | ||
------- | ||
any : `bool` | ||
`True` if the query would (or might, depending on arguments) yield | ||
result rows. `False` if it definitely would not. | ||
""" | ||
raise NotImplementedError() | ||
|
||
@abstractmethod | ||
def explain_no_results(self, execute: bool = True) -> Iterable[str]: | ||
"""Return human-readable messages that may help explain why the query | ||
yields no results. | ||
Parameters | ||
---------- | ||
execute : `bool`, optional | ||
If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) | ||
of aspects of the tree to more precisely determine where rows were | ||
filtered out. | ||
Returns | ||
------- | ||
messages : `~collections.abc.Iterable` [ `str` ] | ||
String messages that describe reasons the query might not yield any | ||
results. | ||
""" | ||
raise NotImplementedError() | ||
|
||
|
||
class HomogeneousQueryBase(QueryBase): | ||
def __init__(self, driver: QueryDriver, tree: QueryTree): | ||
self._driver = driver | ||
self._tree = tree | ||
|
||
@property | ||
def dimensions(self) -> DimensionGroup: | ||
"""All dimensions included in the query's columns.""" | ||
return self._tree.dimensions | ||
|
||
def any(self, *, execute: bool = True, exact: bool = True) -> bool: | ||
# Docstring inherited. | ||
return self._driver.any(self._tree, execute=execute, exact=exact) | ||
|
||
def explain_no_results(self, execute: bool = True) -> Iterable[str]: | ||
# Docstring inherited. | ||
return self._driver.explain_no_results(self._tree, execute=execute) | ||
|
||
|
||
class CountableQueryBase(QueryBase): | ||
@abstractmethod | ||
def count(self, *, exact: bool = True, discard: bool = False) -> int: | ||
"""Count the number of rows this query would return. | ||
Parameters | ||
---------- | ||
exact : `bool`, optional | ||
If `True`, run the full query and perform post-query filtering if | ||
needed to account for that filtering in the count. If `False`, the | ||
result may be an upper bound. | ||
discard : `bool`, optional | ||
If `True`, compute the exact count even if it would require running | ||
the full query and then throwing away the result rows after | ||
counting them. If `False`, this is an error, as the user would | ||
usually be better off executing the query first to fetch its rows | ||
into a new query (or passing ``exact=False``). Ignored if | ||
``exact=False``. | ||
Returns | ||
------- | ||
count : `int` | ||
The number of rows the query would return, or an upper bound if | ||
``exact=False``. | ||
""" | ||
raise NotImplementedError() | ||
|
||
|
||
class QueryResultsBase(HomogeneousQueryBase, CountableQueryBase): | ||
def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self: | ||
"""Return a new query that yields ordered results. | ||
Parameters | ||
---------- | ||
*args : `str` | ||
Names of the columns/dimensions to use for ordering. Column name | ||
can be prefixed with minus (``-``) to use descending ordering. | ||
Returns | ||
------- | ||
result : `QueryResultsBase` | ||
An ordered version of this query results object. | ||
Notes | ||
----- | ||
If this method is called multiple times, the new sort terms replace | ||
the old ones. | ||
""" | ||
return self._copy( | ||
self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args) | ||
) | ||
|
||
def limit(self, limit: int | None = None, offset: int = 0) -> Self: | ||
"""Return a new query that slices its result rows positionally. | ||
Parameters | ||
---------- | ||
limit : `int` or `None`, optional | ||
Upper limit on the number of returned records. | ||
offset : `int`, optional | ||
The number of records to skip before returning at most ``limit`` | ||
records. | ||
Returns | ||
------- | ||
result : `QueryResultsBase` | ||
A sliced version of this query results object. | ||
Notes | ||
----- | ||
If this method is called multiple times, the new slice parameters | ||
replace the old ones. Slicing always occurs after sorting, even if | ||
`limit` is called before `order_by`. | ||
""" | ||
return self._copy(self._tree, limit=limit, offset=offset) | ||
|
||
@abstractmethod | ||
def _get_datasets(self) -> Set[str]: | ||
"""Return all dataset types included in the query's result rows.""" | ||
raise NotImplementedError() | ||
|
||
@abstractmethod | ||
def _copy(self, tree: QueryTree, **kwargs: Any) -> Self: | ||
"""Return a modified copy of ``self``. | ||
Modifications should be validated, not assumed to be correct. | ||
""" | ||
raise NotImplementedError() |
142 changes: 142 additions & 0 deletions
142
python/lsst/daf/butler/queries/_data_coordinate_query_results.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from __future__ import annotations | ||
|
||
__all__ = ("DataCoordinateQueryResults",) | ||
|
||
from collections.abc import Iterable, Iterator | ||
from typing import TYPE_CHECKING, Any | ||
|
||
from ..dimensions import DataCoordinate, DimensionGroup | ||
from ._base import QueryResultsBase | ||
from .driver import QueryDriver | ||
from .tree import InvalidQueryTreeError, QueryTree | ||
|
||
if TYPE_CHECKING: | ||
from .result_specs import DataCoordinateResultSpec | ||
|
||
|
||
class DataCoordinateQueryResults(QueryResultsBase): | ||
"""A method-chaining builder for butler queries that return data IDs. | ||
Parameters | ||
---------- | ||
driver : `QueryDriver` | ||
Implementation object that knows how to actually execute queries. | ||
tree : `QueryTree` | ||
Description of the query as a tree of joins and column expressions. | ||
The instance returned directly by the `Butler._query` entry point | ||
should be constructed via `make_unit_query_tree`. | ||
spec : `DataCoordinateResultSpec` | ||
Specification of the query result rows, including output columns, | ||
ordering, and slicing. | ||
Notes | ||
----- | ||
This refines the `DataCoordinateQueryResults` ABC defined in | ||
`lsst.daf.butler._query_results`, but the intent is to replace that ABC | ||
with this concrete class, rather than inherit from it. | ||
""" | ||
|
||
def __init__(self, driver: QueryDriver, tree: QueryTree, spec: DataCoordinateResultSpec): | ||
spec.validate_tree(tree) | ||
super().__init__(driver, tree) | ||
self._spec = spec | ||
|
||
def __iter__(self) -> Iterator[DataCoordinate]: | ||
page = self._driver.execute(self._spec, self._tree) | ||
yield from page.rows | ||
while page.next_key is not None: | ||
page = self._driver.fetch_next_page(self._spec, page.next_key) | ||
yield from page.rows | ||
|
||
@property | ||
def has_dimension_records(self) -> bool: | ||
"""Whether all data IDs in this iterable contain dimension records.""" | ||
return self._spec.include_dimension_records | ||
|
||
def with_dimension_records(self) -> DataCoordinateQueryResults: | ||
"""Return a results object for which `has_dimension_records` is | ||
`True`. | ||
""" | ||
if self.has_dimension_records: | ||
return self | ||
return self._copy(tree=self._tree, include_dimension_records=True) | ||
|
||
def subset( | ||
self, | ||
dimensions: DimensionGroup | Iterable[str] | None = None, | ||
) -> DataCoordinateQueryResults: | ||
"""Return a results object containing a subset of the dimensions of | ||
this one. | ||
Parameters | ||
---------- | ||
dimensions : `DimensionGroup` or \ | ||
`~collections.abc.Iterable` [ `str`], optional | ||
Dimensions to include in the new results object. If `None`, | ||
``self.dimensions`` is used. | ||
Returns | ||
------- | ||
results : `DataCoordinateQueryResults` | ||
A results object corresponding to the given criteria. May be | ||
``self`` if it already qualifies. | ||
Raises | ||
------ | ||
InvalidQueryTreeError | ||
Raised when ``dimensions`` is not a subset of the dimensions in | ||
this result. | ||
""" | ||
if dimensions is None: | ||
dimensions = self.dimensions | ||
else: | ||
dimensions = self._driver.universe.conform(dimensions) | ||
if not dimensions <= self.dimensions: | ||
raise InvalidQueryTreeError( | ||
f"New dimensions {dimensions} are not a subset of the current " | ||
f"dimensions {self.dimensions}." | ||
) | ||
return self._copy(tree=self._tree, dimensions=dimensions) | ||
|
||
def count(self, *, exact: bool = True, discard: bool = False) -> int: | ||
# Docstring inherited. | ||
return self._driver.count( | ||
self._tree, | ||
self._spec.get_result_columns(), | ||
find_first_dataset=None, | ||
exact=exact, | ||
discard=discard, | ||
) | ||
|
||
def _copy(self, tree: QueryTree, **kwargs: Any) -> DataCoordinateQueryResults: | ||
return DataCoordinateQueryResults(self._driver, tree, spec=self._spec.model_copy(update=kwargs)) | ||
|
||
def _get_datasets(self) -> frozenset[str]: | ||
return frozenset() |
Oops, something went wrong.