diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c6b825a7..33664c039 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- `Collection.from_items` for creating a `pystac.Collection` from an `ItemCollection` ([#1522](https://github.com/stac-utils/pystac/pull/1522)) + ### Fixed - Make sure that `VersionRange` has `VersionID`s rather than strings ([#1512](https://github.com/stac-utils/pystac/pull/1512)) diff --git a/docs/api/pystac.rst b/docs/api/pystac.rst index a325710eb..e16c4692d 100644 --- a/docs/api/pystac.rst +++ b/docs/api/pystac.rst @@ -141,6 +141,7 @@ ItemCollection .. autoclass:: pystac.ItemCollection :members: :inherited-members: + :undoc-members: Link ---- diff --git a/pystac/catalog.py b/pystac/catalog.py index c1772599c..ae47787c0 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -131,7 +131,7 @@ class Catalog(STACObject): catalog_type : Optional catalog type for this catalog. Must be one of the values in :class:`~pystac.CatalogType`. strategy : The layout strategy to use for setting the - HREFs of the catalog child objections and items. + HREFs of the catalog child objects and items. If not provided, it will default to the strategy of the root and fallback to :class:`~pystac.layout.BestPracticesLayoutStrategy`. """ diff --git a/pystac/collection.py b/pystac/collection.py index 891b3cc33..200b2f10d 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -474,7 +474,7 @@ class Collection(Catalog, Assets): :class:`~pystac.Asset` values in the dictionary will have their :attr:`~pystac.Asset.owner` attribute set to the created Collection. strategy : The layout strategy to use for setting the - HREFs of the catalog child objections and items. + HREFs of the catalog child objects and items. If not provided, it will default to strategy of the parent and fallback to :class:`~pystac.layout.BestPracticesLayoutStrategy`. """ @@ -710,6 +710,77 @@ def from_dict( return collection + @classmethod + def from_items( + cls: type[Collection], + items: Iterable[Item] | pystac.ItemCollection, + *, + id: str | None = None, + strategy: HrefLayoutStrategy | None = None, + ) -> Collection: + """Create a :class:`Collection` from iterable of items or an + :class:`~pystac.ItemCollection`. + + Will try to pull collection attributes from + :attr:`~pystac.ItemCollection.extra_fields` and items when possible. + + Args: + items : Iterable of :class:`~pystac.Item` instances to include in the + :class:`Collection`. This can be a :class:`~pystac.ItemCollection`. + id : Identifier for the collection. If not set, must be available on the + items and they must all match. + strategy : The layout strategy to use for setting the + HREFs of the catalog child objects and items. + If not provided, it will default to strategy of the parent and fallback + to :class:`~pystac.layout.BestPracticesLayoutStrategy`. + """ + + def extract(attr: str) -> Any: + """Extract attrs from items or item.properties as long as they all match""" + value = None + values = {getattr(item, attr, None) for item in items} + if len(values) == 1: + value = next(iter(values)) + if value is None: + values = {item.properties.get(attr, None) for item in items} + if len(values) == 1: + value = next(iter(values)) + return value + + if isinstance(items, pystac.ItemCollection): + extra_fields = deepcopy(items.extra_fields) + links = extra_fields.pop("links", {}) + providers = extra_fields.pop("providers", None) + if providers is not None: + providers = [pystac.Provider.from_dict(p) for p in providers] + else: + extra_fields = {} + links = {} + providers = [] + + id = id or extract("collection_id") + if id is None: + raise ValueError( + "Collection id must be defined. Either by specifying collection_id " + "on every item, or as a keyword argument to this function." + ) + + collection = cls( + id=id, + description=extract("description"), + extent=Extent.from_items(items), + title=extract("title"), + providers=providers, + extra_fields=extra_fields, + strategy=strategy, + ) + collection.add_items(items) + + for link in links: + collection.add_link(Link.from_dict(link)) + + return collection + def get_item(self, id: str, recursive: bool = False) -> Item | None: """Returns an item with a given ID. diff --git a/tests/conftest.py b/tests/conftest.py index 10a8b606e..86a3c3528 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ import pytest -from pystac import Asset, Catalog, Collection, Item, Link +from pystac import Asset, Catalog, Collection, Item, ItemCollection, Link from .utils import ARBITRARY_BBOX, ARBITRARY_EXTENT, ARBITRARY_GEOM, TestCases @@ -76,6 +76,18 @@ def sample_item() -> Item: return Item.from_file(TestCases.get_path("data-files/item/sample-item.json")) +@pytest.fixture +def sample_item_collection() -> ItemCollection: + return ItemCollection.from_file( + TestCases.get_path("data-files/item-collection/sample-item-collection.json") + ) + + +@pytest.fixture +def sample_items(sample_item_collection: ItemCollection) -> list[Item]: + return list(sample_item_collection) + + @pytest.fixture(scope="function") def tmp_asset(tmp_path: Path) -> Asset: """Copy the entirety of test-case-2 to tmp and""" diff --git a/tests/test_collection.py b/tests/test_collection.py index 5aafa1ea0..2279e21ce 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -19,6 +19,7 @@ Collection, Extent, Item, + ItemCollection, Provider, SpatialExtent, TemporalExtent, @@ -711,3 +712,111 @@ def test_permissive_temporal_extent_deserialization(collection: Collection) -> N ]["interval"][0] with pytest.warns(UserWarning): Collection.from_dict(collection_dict) + + +@pytest.mark.parametrize("fixture_name", ("sample_item_collection", "sample_items")) +def test_from_items(fixture_name: str, request: pytest.FixtureRequest) -> None: + items = request.getfixturevalue(fixture_name) + collection = Collection.from_items(items) + + for item in items: + assert collection.id == item.collection_id + assert collection.extent.spatial.bboxes[0][0] <= item.bbox[0] + assert collection.extent.spatial.bboxes[0][1] <= item.bbox[1] + assert collection.extent.spatial.bboxes[0][2] >= item.bbox[2] + assert collection.extent.spatial.bboxes[0][3] >= item.bbox[3] + + start = collection.extent.temporal.intervals[0][0] + end = collection.extent.temporal.intervals[0][1] + assert start and start <= str_to_datetime(item.properties["start_datetime"]) + assert end and end >= str_to_datetime(item.properties["end_datetime"]) + + if isinstance(items, ItemCollection): + expected = {(link["rel"], link["href"]) for link in items.extra_fields["links"]} + actual = {(link.rel, link.href) for link in collection.links} + assert expected.issubset(actual) + + +def test_from_items_pulls_from_properties() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + collection="test-collection-1", + properties={"title": "Test Item", "description": "Extra words describing"}, + ) + collection = Collection.from_items([item1]) + assert collection.id == item1.collection_id + assert collection.title == item1.properties["title"] + assert collection.description == item1.properties["description"] + + +def test_from_items_without_collection_id() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + properties={}, + ) + with pytest.raises(ValueError, match="Collection id must be defined."): + Collection.from_items([item1]) + + collection = Collection.from_items([item1], id="test-collection") + assert collection.id == "test-collection" + + +def test_from_items_with_collection_ids() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + collection="test-collection-1", + properties={}, + ) + item2 = Item( + id="test-item-2", + geometry=ARBITRARY_GEOM, + bbox=[-15, -20, 0, -10], + datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC), + collection="test-collection-2", + properties={}, + ) + + with pytest.raises(ValueError, match="Collection id must be defined."): + Collection.from_items([item1, item2]) + + collection = Collection.from_items([item1, item2], id="test-collection") + assert collection.id == "test-collection" + + +def test_from_items_with_different_values() -> None: + item1 = Item( + id="test-item-1", + geometry=ARBITRARY_GEOM, + bbox=[-10, -20, 0, -10], + datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC), + properties={"title": "Test Item 1"}, + ) + item2 = Item( + id="test-item-2", + geometry=ARBITRARY_GEOM, + bbox=[-15, -20, 0, -10], + datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC), + properties={"title": "Test Item 2"}, + ) + + collection = Collection.from_items([item1, item2], id="test_collection") + assert collection.title is None + + +def test_from_items_with_providers(sample_item_collection: ItemCollection) -> None: + sample_item_collection.extra_fields["providers"] = [{"name": "pystac"}] + + collection = Collection.from_items(sample_item_collection) + assert collection.providers and len(collection.providers) == 1 + + provider = collection.providers[0] + assert provider and provider.name == "pystac"