From 2da2e514c7a5d624ab540fb63c813b1d33020572 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 13 Jun 2023 13:38:24 -0400 Subject: [PATCH 1/3] Add move/copy/delete methods on Asset --- pystac/asset.py | 51 +++++++++++++++++++ pystac/collection.py | 18 ++++++- pystac/item.py | 15 ++++++ tests/conftest.py | 16 +++++- tests/test_asset.py | 104 +++++++++++++++++++++++++++++++++++++++ tests/test_collection.py | 39 +++++++++++++++ tests/test_item.py | 34 +++++++++++++ 7 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 tests/test_asset.py diff --git a/pystac/asset.py b/pystac/asset.py index 0bca478cc..3990da3d9 100644 --- a/pystac/asset.py +++ b/pystac/asset.py @@ -1,5 +1,7 @@ from __future__ import annotations +import os +import shutil from copy import copy, deepcopy from html import escape from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, TypeVar, Union @@ -211,3 +213,52 @@ def from_dict(cls: Type[A], d: Dict[str, Any]) -> A: roles=roles, extra_fields=properties, ) + + def move(self, href: str) -> Asset: + """Moves this asset's file to a new location on the local filesystem, + setting the asset href accordingly. + + Modifies the asset in place, and returns the same asset. + """ + src = _absolute_href(self.href, self.owner, "move") + dst = _absolute_href(href, self.owner, "move") + shutil.move(src, dst) + self.href = href + return self + + def copy(self, href: str) -> Asset: + """Copies this asset's file to a new location on the local filesystem, + setting the asset href accordingly. + + Modifies the asset in place, and returns the same asset. + """ + src = _absolute_href(self.href, self.owner, "copy") + dst = _absolute_href(href, self.owner, "copy") + shutil.copy2(src, dst) + self.href = href + return self + + def delete(self) -> None: + """Delete this asset's file. Does not delete the asset from the item + that owns it. See :func:`~pystac.Item.delete_asset` for that. + + Does not modify the asset. + """ + href = _absolute_href(self.href, self.owner, "delete") + os.remove(href) + + +def _absolute_href( + href: str, owner: Optional[Union[Item, Collection]], action: str = "access" +) -> str: + if utils.is_absolute_href(href): + return href + else: + item_self = owner.get_self_href() if owner else None + if item_self is None: + raise ValueError( + f"Cannot {action} file if asset href ('{href}') is relative " + "and owner item is not set. Hint: try using " + ":func:`~pystac.Item.make_asset_hrefs_absolute`" + ) + return utils.make_absolute_href(href, item_self) diff --git a/pystac/collection.py b/pystac/collection.py index 9817a48f4..d9268e202 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -731,7 +731,7 @@ def get_assets( } def add_asset(self, key: str, asset: Asset) -> None: - """Adds an Asset to this item. + """Adds an Asset to this collection. Args: key : The unique key of this asset. @@ -740,6 +740,22 @@ def add_asset(self, key: str, asset: Asset) -> None: asset.set_owner(self) self.assets[key] = asset + def delete_asset(self, key: str) -> None: + """Deletes the asset at the given key, and removes the asset's data + file from the local filesystem. + + It is an error to attempt to delete an asset's file if it is on a + remote filesystem. + + To delete the asset without removing the file, use + `del collection.assets["key"]`. + """ + asset = self.assets[key] + asset.set_owner(self) + asset.delete() + + del self.assets[key] + def update_extent_from_items(self) -> None: """ Update datetime and bbox based on all items to a single bbox and time window. diff --git a/pystac/item.py b/pystac/item.py index 77b063b97..c52489bef 100644 --- a/pystac/item.py +++ b/pystac/item.py @@ -263,6 +263,21 @@ def add_asset(self, key: str, asset: Asset) -> None: asset.set_owner(self) self.assets[key] = asset + def delete_asset(self, key: str) -> None: + """Deletes the asset at the given key, and removes the asset's data + file from the local filesystem. + + It is an error to attempt to delete an asset's file if it is on a + remote filesystem. + + To delete the asset without removing the file, use `del item.assets["key"]`. + """ + asset = self.assets[key] + asset.set_owner(self) + asset.delete() + + del self.assets[key] + def make_asset_hrefs_relative(self) -> Item: """Modify each asset's HREF to be relative to this item's self HREF. diff --git a/tests/conftest.py b/tests/conftest.py index 5f311e07c..fbe820471 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,13 @@ # TODO move all test case code to this file +import shutil +import uuid from datetime import datetime from pathlib import Path import pytest -from pystac import Catalog, Collection, Item +from pystac import Asset, Catalog, Collection, Item from .utils import ARBITRARY_BBOX, ARBITRARY_EXTENT, ARBITRARY_GEOM, TestCases @@ -50,3 +52,15 @@ def get_data_file(rel_path: str) -> str: @pytest.fixture def sample_item() -> Item: return Item.from_file(TestCases.get_path("data-files/item/sample-item.json")) + + +@pytest.fixture(scope="function") +def tmp_asset(tmp_path: Path) -> Asset: + """Copy the entirety of test-case-2 to tmp and""" + src = get_data_file("catalogs/test-case-2") + dst = str(tmp_path / str(uuid.uuid4())) + shutil.copytree(src, dst) + + catalog = Catalog.from_file(f"{dst}/catalog.json") + item = next(catalog.get_items(recursive=True)) + return next(v for v in item.assets.values()) diff --git a/tests/test_asset.py b/tests/test_asset.py new file mode 100644 index 000000000..b41113403 --- /dev/null +++ b/tests/test_asset.py @@ -0,0 +1,104 @@ +import os +from pathlib import Path + +import pytest + +import pystac + + +@pytest.mark.parametrize("action", ["copy", "move"]) +def test_alter_asset_absolute_path( + action: str, tmp_asset: pystac.Asset, tmp_path: Path +) -> None: + asset = tmp_asset + old_href = asset.get_absolute_href() + assert old_href is not None + + new_href = str(tmp_path / "data.geojson") + getattr(asset, action)(new_href) + + assert asset.href == new_href + assert asset.get_absolute_href() == new_href + assert os.path.exists(new_href) + if action == "move": + assert not os.path.exists(old_href) + elif action == "copy": + assert os.path.exists(old_href) + + +@pytest.mark.parametrize("action", ["copy", "move"]) +def test_alter_asset_relative_path(action: str, tmp_asset: pystac.Asset) -> None: + asset = tmp_asset + old_href = asset.get_absolute_href() + assert old_href is not None + + new_href = "./different.geojson" + getattr(asset, action)(new_href) + + assert asset.href == new_href + href = asset.get_absolute_href() + assert href is not None + assert os.path.exists(href) + if action == "move": + assert not os.path.exists(old_href) + elif action == "copy": + assert os.path.exists(old_href) + + +@pytest.mark.parametrize("action", ["copy", "move"]) +def test_alter_asset_relative_src_no_owner_fails( + action: str, tmp_asset: pystac.Asset +) -> None: + asset = tmp_asset + asset.owner = None + new_href = "./different.geojson" + with pytest.raises(ValueError, match=f"Cannot {action} file") as e: + getattr(asset, action)(new_href) + + assert new_href not in str(e.value) + assert asset.href != new_href + + +@pytest.mark.parametrize("action", ["copy", "move"]) +def test_alter_asset_relative_dst_no_owner_fails( + action: str, tmp_asset: pystac.Asset +) -> None: + asset = tmp_asset + item = asset.owner + + assert isinstance(item, pystac.Item) + item.make_asset_hrefs_absolute() + + asset.owner = None + new_href = "./different.geojson" + with pytest.raises(ValueError, match=f"Cannot {action} file") as e: + getattr(asset, action)(new_href) + + assert new_href in str(e.value) + assert asset.href != new_href + + +def test_delete_asset(tmp_asset: pystac.Asset) -> None: + asset = tmp_asset + href = asset.get_absolute_href() + assert href is not None + assert os.path.exists(href) + + asset.delete() + + assert not os.path.exists(href) + + +def test_delete_asset_relative_no_owner_fails(tmp_asset: pystac.Asset) -> None: + asset = tmp_asset + href = asset.get_absolute_href() + assert href is not None + assert os.path.exists(href) + + asset.owner = None + + with pytest.raises(ValueError, match="Cannot delete file") as e: + asset.delete() + + assert asset.href in str(e.value) + assert os.path.exists(href) diff --git a/tests/test_collection.py b/tests/test_collection.py index b41db628b..bc7d4fa6b 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -13,6 +13,7 @@ import pystac from pystac import ( + Asset, Catalog, CatalogType, Collection, @@ -619,3 +620,41 @@ def test_get_item_is_not_recursive_by_default( item = col8.get_item("20170831_162740_ssc1d1", recursive=True) assert item is not None + + +def test_delete_asset(tmp_asset: Asset, collection: Collection) -> None: + asset = tmp_asset + href = asset.get_absolute_href() + item = asset.owner + name = "foo" + + assert href is not None + assert item is not None + + collection.add_asset(name, asset) + + # steal the href from the owner and use it as the collection's + collection.set_self_href(item.get_self_href()) + + collection.delete_asset(name) + assert name not in collection.assets + assert not os.path.exists(href) + + +def test_delete_asset_relative_no_self_link_fails( + tmp_asset: Asset, collection: Collection +) -> None: + asset = tmp_asset + href = asset.get_absolute_href() + name = "foo" + + assert href is not None + + collection.add_asset(name, asset) + + with pytest.raises(ValueError, match="Cannot delete file") as e: + collection.delete_asset(name) + + assert asset.href in str(e.value) + assert name in collection.assets + assert os.path.exists(href) diff --git a/tests/test_item.py b/tests/test_item.py index 8d0648e33..70d1b108e 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -548,3 +548,37 @@ def test_remove_derived_from(test_case_1_catalog: Catalog) -> None: for link in item_0.links: assert link.rel != pystac.RelType.DERIVED_FROM assert item_0.get_single_link(pystac.RelType.DERIVED_FROM) is None + + +def test_delete_asset(tmp_asset: Asset) -> None: + asset = tmp_asset + href = asset.get_absolute_href() + item = asset.owner + + assert href is not None + assert item is not None + + name = next(k for k in item.assets.keys() if item.assets[k] == asset) + item.delete_asset(name) + + assert name not in item.assets + assert not os.path.exists(href) + + +def test_delete_asset_relative_no_self_link_fails(tmp_asset: pystac.Asset) -> None: + asset = tmp_asset + href = asset.get_absolute_href() + item = asset.owner + + assert href is not None + assert item is not None + + item.set_self_href(None) + + name = next(k for k in item.assets.keys() if item.assets[k] == asset) + with pytest.raises(ValueError, match="Cannot delete file") as e: + item.delete_asset(name) + + assert asset.href in str(e.value) + assert name in item.assets + assert os.path.exists(href) From 0a4df88297724c1c3f02067b97e7d1e7251a62db Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 13 Jun 2023 15:16:14 -0400 Subject: [PATCH 2/3] Update pystac/asset.py Co-authored-by: Pete Gadomski --- pystac/asset.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pystac/asset.py b/pystac/asset.py index 3990da3d9..e6018436c 100644 --- a/pystac/asset.py +++ b/pystac/asset.py @@ -219,6 +219,12 @@ def move(self, href: str) -> Asset: setting the asset href accordingly. Modifies the asset in place, and returns the same asset. + + Args: + href: The new asset location + + Returns: + Asset: The asset with the updated href. """ src = _absolute_href(self.href, self.owner, "move") dst = _absolute_href(href, self.owner, "move") From 6cd47f7ebc8faef4c5c1d03a27550f4a4a766f84 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 13 Jun 2023 15:21:08 -0400 Subject: [PATCH 3/3] Improve docs and add changelog entry --- CHANGELOG.md | 1 + pystac/asset.py | 14 +++++++++++--- pystac/collection.py | 3 +++ pystac/item.py | 3 +++ 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3def82991..17dc70f2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - `ItemEOExtension.get_assets` for getting assets filtered on band `name` or `common_name` ([#1140](https://github.com/stac-utils/pystac/pull/1140)) - `max_items` and `recursive` to `Catalog.validate_all` ([#1141](https://github.com/stac-utils/pystac/pull/1141)) - `KML` as a built in media type ([#1127](https://github.com/stac-utils/pystac/issues/1127)) +- `move/copy/delete` operations for local Assets ([#1158](https://github.com/stac-utils/pystac/issues/1158)) ### Changed diff --git a/pystac/asset.py b/pystac/asset.py index e6018436c..cbcf2c039 100644 --- a/pystac/asset.py +++ b/pystac/asset.py @@ -219,10 +219,11 @@ def move(self, href: str) -> Asset: setting the asset href accordingly. Modifies the asset in place, and returns the same asset. - + Args: - href: The new asset location - + href: The new asset location. Must be a local path. If relative + it must be relative to the owner object. + Returns: Asset: The asset with the updated href. """ @@ -237,6 +238,13 @@ def copy(self, href: str) -> Asset: setting the asset href accordingly. Modifies the asset in place, and returns the same asset. + + Args: + href: The new asset location. Must be a local path. If relative + it must be relative to the owner object. + + Returns: + Asset: The asset with the updated href. """ src = _absolute_href(self.href, self.owner, "copy") dst = _absolute_href(href, self.owner, "copy") diff --git a/pystac/collection.py b/pystac/collection.py index d9268e202..fbe4c550e 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -749,6 +749,9 @@ def delete_asset(self, key: str) -> None: To delete the asset without removing the file, use `del collection.assets["key"]`. + + Args: + key: The unique key of this asset. """ asset = self.assets[key] asset.set_owner(self) diff --git a/pystac/item.py b/pystac/item.py index c52489bef..b1007e969 100644 --- a/pystac/item.py +++ b/pystac/item.py @@ -271,6 +271,9 @@ def delete_asset(self, key: str) -> None: remote filesystem. To delete the asset without removing the file, use `del item.assets["key"]`. + + Args: + key: The unique key of this asset. """ asset = self.assets[key] asset.set_owner(self)