From cc892637e35c3056d8d5adaed85f79a499dfc802 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 17 Jun 2021 18:16:33 -0400 Subject: [PATCH 1/3] Add 'preserve_dict' parameter to STACObject.to_dict --- pystac/catalog.py | 4 +++- pystac/collection.py | 5 ++++- pystac/item.py | 5 ++++- pystac/stac_object.py | 6 ++++++ tests/test_catalog.py | 14 ++++++++++++++ tests/test_collection.py | 16 ++++++++++++++++ tests/test_item.py | 14 ++++++++++++-- 7 files changed, 59 insertions(+), 5 deletions(-) diff --git a/pystac/catalog.py b/pystac/catalog.py index 5fbb209b3..afd9ff455 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -906,6 +906,7 @@ def from_dict( href: Optional[str] = None, root: Optional["Catalog"] = None, migrate: bool = False, + preserve_dict: bool = True, ) -> "Catalog": if migrate: info = identify_stac_object(d) @@ -916,7 +917,8 @@ def from_dict( catalog_type = CatalogType.determine_type(d) - d = deepcopy(d) + if preserve_dict: + d = deepcopy(d) id = d.pop("id") description = d.pop("description") diff --git a/pystac/collection.py b/pystac/collection.py index 77a9a8644..27f6bac7c 100644 --- a/pystac/collection.py +++ b/pystac/collection.py @@ -587,6 +587,7 @@ def from_dict( href: Optional[str] = None, root: Optional[Catalog] = None, migrate: bool = False, + preserve_dict: bool = True, ) -> "Collection": if migrate: info = identify_stac_object(d) @@ -597,7 +598,9 @@ def from_dict( catalog_type = CatalogType.determine_type(d) - d = deepcopy(d) + if preserve_dict: + d = deepcopy(d) + id = d.pop("id") description = d.pop("description") license = d.pop("license") diff --git a/pystac/item.py b/pystac/item.py index d0896a324..2aba89b65 100644 --- a/pystac/item.py +++ b/pystac/item.py @@ -915,6 +915,7 @@ def from_dict( href: Optional[str] = None, root: Optional[Catalog] = None, migrate: bool = False, + preserve_dict: bool = True, ) -> "Item": if migrate: info = identify_stac_object(d) @@ -925,7 +926,9 @@ def from_dict( f"{d} does not represent a {cls.__name__} instance" ) - d = deepcopy(d) + if preserve_dict: + d = deepcopy(d) + id = d.pop("id") geometry = d.pop("geometry") properties = d.pop("properties") diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 7564b104f..622df317a 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -495,6 +495,7 @@ def from_dict( href: Optional[str] = None, root: Optional["Catalog_Type"] = None, migrate: bool = False, + preserve_dict: bool = True, ) -> "STACObject": """Parses this STACObject from the passed in dictionary. @@ -507,6 +508,11 @@ def from_dict( previously resolved instances of the STAC object. migrate: Use True if this dict represents JSON from an older STAC object, so that migrations are run against it. + preserve_dict: If False, the dict parameter ``d`` may be modified + during this method call. Otherwise the dict is not mutated. + Defaults to True, which results results in a deepcopy of the + parameter. Set to False when possible to avoid the performance + hit of a deepcopy. Defaults to True. Returns: STACObject: The STACObject parsed from this dict. diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 905de15e7..aed296e22 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1,3 +1,4 @@ +from copy import deepcopy import os import json import tempfile @@ -85,6 +86,19 @@ def test_create_and_read(self) -> None: self.assertEqual(len(list(items)), 8) + def test_from_dict_preserves_dict(self) -> None: + catalog_dict = TestCases.test_case_1().to_dict() + param_dict = deepcopy(catalog_dict) + + # test that the parameter is preserved + _ = Catalog.from_dict(param_dict) + self.assertEqual(param_dict, catalog_dict) + + # assert that the parameter is not preserved with + # non-default parameter + _ = Catalog.from_dict(param_dict, preserve_dict=False) + self.assertNotEqual(param_dict, catalog_dict) + def test_read_remote(self) -> None: # TODO: Move this URL to the main stac-spec repo once the example JSON is fixed. catalog_url = ( diff --git a/tests/test_collection.py b/tests/test_collection.py index 69a3e2f1e..ddb67be92 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -1,3 +1,4 @@ +from copy import deepcopy import unittest import os import json @@ -182,6 +183,21 @@ def test_assets(self) -> None: collection = pystac.Collection.from_dict(data) collection.validate() + def test_to_dict_preserves_dict(self) -> None: + path = TestCases.get_path("data-files/collections/with-assets.json") + with open(path) as f: + collection_dict = json.load(f) + param_dict = deepcopy(collection_dict) + + # test that the parameter is preserved + _ = Collection.from_dict(param_dict) + self.assertEqual(param_dict, collection_dict) + + # assert that the parameter is not preserved with + # non-default parameter + _ = Collection.from_dict(param_dict, preserve_dict=False) + self.assertNotEqual(param_dict, collection_dict) + def test_schema_summary(self) -> None: collection = pystac.Collection.from_file( TestCases.get_path( diff --git a/tests/test_item.py b/tests/test_item.py index 490daad24..37ba12401 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -1,3 +1,4 @@ +from copy import deepcopy import os from datetime import datetime import json @@ -25,9 +26,10 @@ def test_to_from_dict(self) -> None: self.maxDiff = None item_dict = self.get_example_item_dict() + param_dict = deepcopy(item_dict) - assert_to_from_dict(self, Item, item_dict) - item = Item.from_dict(item_dict) + assert_to_from_dict(self, Item, param_dict) + item = Item.from_dict(param_dict) self.assertEqual(item.id, "CS3-20160503_132131_05") # test asset creation additional field(s) @@ -37,6 +39,14 @@ def test_to_from_dict(self) -> None: ) self.assertEqual(len(item.assets["thumbnail"].properties), 0) + # test that the parameter is preserved + self.assertEqual(param_dict, item_dict) + + # assert that the parameter is not preserved with + # non-default parameter + _ = Item.from_dict(param_dict, preserve_dict=False) + self.assertNotEqual(param_dict, item_dict) + def test_set_self_href_does_not_break_asset_hrefs(self) -> None: cat = TestCases.test_case_2() for item in cat.get_all_items(): From 5e5ffb31a46d5bbfbd8cbe7b1dda6f083332174e Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 17 Jun 2021 18:52:48 -0400 Subject: [PATCH 2/3] Use from_dict with preserve_dict=False when reading from files. --- pystac/stac_io.py | 15 +++++++++++---- pystac/stac_object.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pystac/stac_io.py b/pystac/stac_io.py index f5bee808e..00e285d23 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -99,6 +99,7 @@ def stac_object_from_dict( d: Dict[str, Any], href: Optional[str] = None, root: Optional["Catalog_Type"] = None, + preserve_dict: bool = True, ) -> "STACObject_Type": if identify_stac_object_type(d) == pystac.STACObjectType.ITEM: collection_cache = None @@ -114,15 +115,21 @@ def stac_object_from_dict( d = migrate_to_latest(d, info) if info.object_type == pystac.STACObjectType.CATALOG: - result = pystac.Catalog.from_dict(d, href=href, root=root, migrate=False) + result = pystac.Catalog.from_dict( + d, href=href, root=root, migrate=False, preserve_dict=preserve_dict + ) result._stac_io = self return result if info.object_type == pystac.STACObjectType.COLLECTION: - return pystac.Collection.from_dict(d, href=href, root=root, migrate=False) + return pystac.Collection.from_dict( + d, href=href, root=root, migrate=False, preserve_dict=preserve_dict + ) if info.object_type == pystac.STACObjectType.ITEM: - return pystac.Item.from_dict(d, href=href, root=root, migrate=False) + return pystac.Item.from_dict( + d, href=href, root=root, migrate=False, preserve_dict=preserve_dict + ) raise ValueError(f"Unknown STAC object type {info.object_type}") @@ -164,7 +171,7 @@ def read_stac_object( """ d = self.read_json(source) href = source if isinstance(source, str) else source.get_absolute_href() - return self.stac_object_from_dict(d, href=href, root=root) + return self.stac_object_from_dict(d, href=href, root=root, preserve_dict=False) def save_json( self, dest: Union[str, "Link_Type"], json_dict: Dict[str, Any] diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 622df317a..1e74dd10e 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -473,7 +473,7 @@ def from_file( href = make_absolute_href(href) d = stac_io.read_json(href) - o = cls.from_dict(d, href=href, migrate=True) + o = cls.from_dict(d, href=href, migrate=True, preserve_dict=False) # Set the self HREF, if it's not already set to something else. if o.get_self_href() is None: From 68df9bdfc4ded2ac4fceaf5f0b1a70916a7ecc4d Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 17 Jun 2021 19:46:25 -0400 Subject: [PATCH 3/3] Avoid duplication in docstring Co-authored-by: Jon Duckworth --- pystac/stac_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 1e74dd10e..e61adf97b 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -512,7 +512,7 @@ def from_dict( during this method call. Otherwise the dict is not mutated. Defaults to True, which results results in a deepcopy of the parameter. Set to False when possible to avoid the performance - hit of a deepcopy. Defaults to True. + hit of a deepcopy. Returns: STACObject: The STACObject parsed from this dict.