Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Collection.from_items #1522

Merged
merged 6 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Added

- `Collection.from_items` for creating a `pystac.Collection` from an `ItemCollection` ([#1522](https://github.com/stac-utils/pystac/pull/1522))

### Fixed

- Make sure that `VersionRange` has `VersionID`s rather than strings ([#1512](https://github.com/stac-utils/pystac/pull/1512))
Expand Down
1 change: 1 addition & 0 deletions docs/api/pystac.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ ItemCollection
.. autoclass:: pystac.ItemCollection
:members:
:inherited-members:
:undoc-members:

Link
----
Expand Down
71 changes: 71 additions & 0 deletions pystac/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,77 @@ def from_dict(

return collection

@classmethod
def from_items(
cls: type[Collection],
items: Iterable[Item] | pystac.ItemCollection,
*,
id: str | None = None,
strategy: HrefLayoutStrategy | None = None,
) -> Collection:
"""Create a :class:`Collection` from iterable of items or an
:class:`~pystac.ItemCollection`.

Will try to pull collection attributes from
:attr:`~pystac.ItemCollection.extra_fields` and items when possible.

Args:
items : Iterable of :class:`~pystac.Item` instances to include in the
:class:`Collection`. This can be a :class:`~pystac.ItemCollection`.
id : Identifier for the collection. If not set, must be available on the
items and they must all match.
strategy : The layout strategy to use for setting the
HREFs of the catalog child objections and items.
If not provided, it will default to strategy of the parent and fallback
to :class:`~pystac.layout.BestPracticesLayoutStrategy`.
"""

def extract(attr: str) -> Any:
"""Extract attrs from items or item.properties as long as they all match"""
value = None
values = {getattr(item, attr, None) for item in items}
if len(values) == 1:
value = next(iter(values))
if value is None:
values = {item.properties.get(attr, None) for item in items}
if len(values) == 1:
value = next(iter(values))
return value

if isinstance(items, pystac.ItemCollection):
extra_fields = deepcopy(items.extra_fields)
links = extra_fields.pop("links", {})
providers = extra_fields.pop("providers", None)
if providers is not None:
providers = [pystac.Provider.from_dict(p) for p in providers]
else:
extra_fields = {}
links = {}
providers = []

id = id or extract("collection_id")
if id is None:
raise ValueError(
"Collection id must be defined. Either by specifying collection_id "
"on every item, or as a keyword argument to this function."
)

collection = cls(
id=id,
description=extract("description"),
extent=Extent.from_items(items),
title=extract("title"),
providers=providers,
extra_fields=extra_fields,
strategy=strategy,
)
collection.add_items(items)

for link in links:
collection.add_link(Link.from_dict(link))

return collection

def get_item(self, id: str, recursive: bool = False) -> Item | None:
"""Returns an item with a given ID.

Expand Down
14 changes: 13 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import pytest

from pystac import Asset, Catalog, Collection, Item, Link
from pystac import Asset, Catalog, Collection, Item, ItemCollection, Link

from .utils import ARBITRARY_BBOX, ARBITRARY_EXTENT, ARBITRARY_GEOM, TestCases

Expand Down Expand Up @@ -76,6 +76,18 @@ def sample_item() -> Item:
return Item.from_file(TestCases.get_path("data-files/item/sample-item.json"))


@pytest.fixture
def sample_item_collection() -> ItemCollection:
return ItemCollection.from_file(
TestCases.get_path("data-files/item-collection/sample-item-collection.json")
)


@pytest.fixture
def sample_items(sample_item_collection: ItemCollection) -> list[Item]:
return list(sample_item_collection)


@pytest.fixture(scope="function")
def tmp_asset(tmp_path: Path) -> Asset:
"""Copy the entirety of test-case-2 to tmp and"""
Expand Down
109 changes: 109 additions & 0 deletions tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Collection,
Extent,
Item,
ItemCollection,
Provider,
SpatialExtent,
TemporalExtent,
Expand Down Expand Up @@ -711,3 +712,111 @@ def test_permissive_temporal_extent_deserialization(collection: Collection) -> N
]["interval"][0]
with pytest.warns(UserWarning):
Collection.from_dict(collection_dict)


@pytest.mark.parametrize("fixture_name", ("sample_item_collection", "sample_items"))
def test_from_items(fixture_name: str, request: pytest.FixtureRequest) -> None:
items = request.getfixturevalue(fixture_name)
collection = Collection.from_items(items)

for item in items:
assert collection.id == item.collection_id
assert collection.extent.spatial.bboxes[0][0] <= item.bbox[0]
assert collection.extent.spatial.bboxes[0][1] <= item.bbox[1]
assert collection.extent.spatial.bboxes[0][2] >= item.bbox[2]
assert collection.extent.spatial.bboxes[0][3] >= item.bbox[3]

start = collection.extent.temporal.intervals[0][0]
end = collection.extent.temporal.intervals[0][1]
assert start and start <= str_to_datetime(item.properties["start_datetime"])
assert end and end >= str_to_datetime(item.properties["end_datetime"])

if isinstance(items, ItemCollection):
expected = {(link["rel"], link["href"]) for link in items.extra_fields["links"]}
actual = {(link.rel, link.href) for link in collection.links}
assert expected.issubset(actual)


def test_from_items_pulls_from_properties() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
collection="test-collection-1",
properties={"title": "Test Item", "description": "Extra words describing"},
)
collection = Collection.from_items([item1])
assert collection.id == item1.collection_id
assert collection.title == item1.properties["title"]
assert collection.description == item1.properties["description"]


def test_from_items_without_collection_id() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
properties={},
)
with pytest.raises(ValueError, match="Collection id must be defined."):
Collection.from_items([item1])

collection = Collection.from_items([item1], id="test-collection")
assert collection.id == "test-collection"


def test_from_items_with_collection_ids() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
collection="test-collection-1",
properties={},
)
item2 = Item(
id="test-item-2",
geometry=ARBITRARY_GEOM,
bbox=[-15, -20, 0, -10],
datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC),
collection="test-collection-2",
properties={},
)

with pytest.raises(ValueError, match="Collection id must be defined."):
Collection.from_items([item1, item2])

collection = Collection.from_items([item1, item2], id="test-collection")
assert collection.id == "test-collection"


def test_from_items_with_different_values() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
properties={"title": "Test Item 1"},
)
item2 = Item(
id="test-item-2",
geometry=ARBITRARY_GEOM,
bbox=[-15, -20, 0, -10],
datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC),
properties={"title": "Test Item 2"},
)

collection = Collection.from_items([item1, item2], id="test_collection")
assert collection.title is None


def test_from_items_with_providers(sample_item_collection: ItemCollection) -> None:
sample_item_collection.extra_fields["providers"] = [{"name": "pystac"}]

collection = Collection.from_items(sample_item_collection)
assert collection.providers and len(collection.providers) == 1

provider = collection.providers[0]
assert provider and provider.name == "pystac"