Skip to content

Commit

Permalink
Use data_id/dataset_type in find_dataset and simplify collections
Browse files Browse the repository at this point in the history
The underscores are the preferred way to add new APIs now.

The collections parameter should not support wildcards so explicitly
declare it should be a sequence of str.
  • Loading branch information
timj committed Oct 31, 2023
1 parent 2b6ba5a commit b22a5f3
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 29 deletions.
22 changes: 10 additions & 12 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from ._timespan import Timespan
from .datastore import DatasetRefURIs, Datastore
from .dimensions import DataId, DimensionConfig
from .registry import CollectionArgType, Registry, RegistryConfig, _RegistryFactory
from .registry import Registry, RegistryConfig, _RegistryFactory
from .repo_relocation import BUTLER_ROOT_TAG
from .transfers import RepoExportContext

Expand Down Expand Up @@ -819,10 +819,10 @@ def get_dataset(self, id: DatasetId) -> DatasetRef | None:
@abstractmethod
def find_dataset(
self,
datasetType: DatasetType | str,
dataId: DataId | None = None,
dataset_type: DatasetType | str,
data_id: DataId | None = None,
*,
collections: CollectionArgType | None = None,
collections: str | Sequence[str] | None = None,
timespan: Timespan | None = None,
datastore_records: bool = False,
**kwargs: Any,
Expand All @@ -836,18 +836,16 @@ def find_dataset(
Parameters
----------
datasetType : `DatasetType` or `str`
dataset_type : `DatasetType` or `str`
A `DatasetType` or the name of one. If this is a `DatasetType`
instance, its storage class will be respected and propagated to
the output, even if it differs from the dataset type definition
in the registry, as long as the storage classes are convertible.
dataId : `dict` or `DataCoordinate`, optional
data_id : `dict` or `DataCoordinate`, optional
A `dict`-like object containing the `Dimension` links that identify
the dataset within a collection.
collections : collection expression, optional
An expression that fully or partially identifies the collections to
search for the dataset; see
:ref:`daf_butler_collection_expressions` for more information.
collections : `str` or `list` [`str`], optional
A an ordered list of collections to search for the dataset.
Defaults to ``self.defaults.collections``.
timespan : `Timespan`, optional
A timespan that the validity range of the dataset must overlap.
Expand All @@ -871,7 +869,7 @@ def find_dataset(
``self.collections`` is `None`.
LookupError
Raised if one or more data ID keys are missing.
lsst.daf.butler.registry.MissingDatasetTypeError
lsst.daf.butler.MissingDatasetTypeError
Raised if the dataset type does not exist.
lsst.daf.butler.MissingCollectionError
Raised if any of ``collections`` does not exist in the registry.
Expand All @@ -889,7 +887,7 @@ def find_dataset(
never changes the behavior.
This method handles component dataset types automatically, though most
other registry operations do not.
other query operations do not.
"""
raise NotImplementedError()

Expand Down
11 changes: 5 additions & 6 deletions python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
)
from .progress import Progress
from .registry import (
CollectionArgType,
CollectionType,
ConflictingDefinitionError,
DataIdError,
Expand Down Expand Up @@ -1327,17 +1326,17 @@ def get_dataset(self, id: DatasetId) -> DatasetRef | None:

def find_dataset(
self,
datasetType: DatasetType | str,
dataId: DataId | None = None,
dataset_type: DatasetType | str,
data_id: DataId | None = None,
*,
collections: CollectionArgType | None = None,
collections: str | Sequence[str] | None = None,
timespan: Timespan | None = None,
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
return self._registry.findDataset(
datasetType,
dataId,
dataset_type,
data_id,
collections=collections,
timespan=timespan,
dataset_records=datastore_records,
Expand Down
16 changes: 8 additions & 8 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from .._timespan import Timespan
from ..datastore import DatasetRefURIs
from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate
from ..registry import CollectionArgType, NoDefaultCollectionError, Registry, RegistryDefaults
from ..registry import NoDefaultCollectionError, Registry, RegistryDefaults
from ..registry.wildcards import CollectionWildcard
from ..transfers import RepoExportContext
from ._config import RemoteButlerConfigModel
Expand Down Expand Up @@ -229,10 +229,10 @@ def get_dataset(self, id: DatasetId) -> DatasetRef | None:

def find_dataset(
self,
datasetType: DatasetType | str,
dataId: DataId | None = None,
dataset_type: DatasetType | str,
data_id: DataId | None = None,
*,
collections: CollectionArgType | None = None,
collections: str | Sequence[str] | None = None,
timespan: Timespan | None = None,
datastore_records: bool = False,
**kwargs: Any,
Expand All @@ -248,14 +248,14 @@ def find_dataset(
# cache to generate list of collection names.
wildcards = CollectionWildcard.from_expression(collections)

if isinstance(datasetType, DatasetType):
datasetType = datasetType.name
if isinstance(dataset_type, DatasetType):
dataset_type = dataset_type.name

query = FindDatasetModel(
dataId=self._simplify_dataId(dataId, **kwargs), collections=wildcards.strings
data_id=self._simplify_dataId(data_id, **kwargs), collections=wildcards.strings
)

path = f"find_dataset/{datasetType}"
path = f"find_dataset/{dataset_type}"
response = self._client.post(
self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True)
)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,6 @@ def find_dataset(

butler = factory.create_butler()
ref = butler.find_dataset(
dataset_type, dataId=unpack_dataId(butler, query.dataId), collections=collection_query
dataset_type, data_id=unpack_dataId(butler, query.data_id), collections=collection_query
)
return ref.to_simple() if ref else None
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@


class FindDatasetModel(_BaseModelCompat):
dataId: SerializedDataCoordinate
data_id: SerializedDataCoordinate
collections: list[str]
2 changes: 1 addition & 1 deletion tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ def testIngest(self) -> None:
datasets[0].refs = [
cast(
DatasetRef,
butler.find_dataset(ref.datasetType, dataId=ref.dataId, collections=ref.run),
butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run),
)
for ref in datasets[0].refs
]
Expand Down

0 comments on commit b22a5f3

Please sign in to comment.