From 41d08e12763bab7719da0bd2f9e62111e6e71d58 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 29 Oct 2024 14:58:27 -0700 Subject: [PATCH] Support clobber/no-clobber when writing Zip archive --- python/lsst/daf/butler/_butler.py | 4 ++++ python/lsst/daf/butler/_quantum_backed.py | 6 +++++- .../butler/datastores/file_datastore/retrieve_artifacts.py | 6 ++++++ python/lsst/daf/butler/direct_butler/_direct_butler.py | 3 ++- python/lsst/daf/butler/remote_butler/_remote_butler.py | 3 ++- python/lsst/daf/butler/script/retrieveArtifacts.py | 2 +- python/lsst/daf/butler/tests/hybrid_butler.py | 3 ++- 7 files changed, 22 insertions(+), 5 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index c45fec258a..fbfedd6bb7 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -992,6 +992,7 @@ def retrieve_artifacts_zip( self, refs: Iterable[DatasetRef], destination: ResourcePathExpression, + overwrite: bool = True, ) -> ResourcePath: """Retrieve artifacts from a Butler and place in ZIP file. @@ -1003,6 +1004,9 @@ def retrieve_artifacts_zip( Directory to write the new ZIP file. This directory will also be used as a staging area for the datasets being downloaded from the datastore. + overwrite : `bool`, optional + If `False` the output Zip will not be written if a file of the + same name is already present in ``destination``. Returns ------- diff --git a/python/lsst/daf/butler/_quantum_backed.py b/python/lsst/daf/butler/_quantum_backed.py index 731e5a6183..067fb93c45 100644 --- a/python/lsst/daf/butler/_quantum_backed.py +++ b/python/lsst/daf/butler/_quantum_backed.py @@ -503,6 +503,7 @@ def retrieve_artifacts_zip( self, refs: Iterable[DatasetRef], destination: ResourcePathExpression, + overwrite: bool = True, ) -> ResourcePath: """Retrieve artifacts from the graph and place in ZIP file. @@ -514,6 +515,9 @@ def retrieve_artifacts_zip( Directory to write the new ZIP file. This directory will also be used as a staging area for the datasets being downloaded from the datastore. + overwrite : `bool`, optional + If `False` the output Zip will not be written if a file of the + same name is already present in ``destination``. Returns ------- @@ -525,7 +529,7 @@ def retrieve_artifacts_zip( ValueError Raised if there are no refs to retrieve. """ - return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts) + return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts, overwrite) def extract_provenance_data(self) -> QuantumProvenanceData: """Extract provenance information and datastore records from this diff --git a/python/lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py b/python/lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py index 41af46ea6f..9f195bcdb0 100644 --- a/python/lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py +++ b/python/lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py @@ -485,6 +485,7 @@ def retrieve_and_zip( refs: Iterable[DatasetRef], destination: ResourcePathExpression, retrieval_callback: RetrievalCallable, + overwrite: bool = True, ) -> ResourcePath: """Retrieve artifacts from a Butler and place in ZIP file. @@ -501,6 +502,9 @@ def retrieve_and_zip( Bound method for a function that can retrieve the artifacts and return the metadata necessary for creating the zip index. For example `lsst.daf.butler.datastore.Datastore.retrieveArtifacts`. + overwrite : `bool`, optional + If `False` the output Zip will not be written if a file of the + same name is already present in ``destination``. Returns ------- @@ -550,6 +554,8 @@ def retrieve_and_zip( # Use unique name based on files in Zip. zip_file_name = index.calculate_zip_file_name() zip_path = outdir.join(zip_file_name, forceDirectory=False) + if not overwrite and zip_path.exists(): + raise FileExistsError(f"Output Zip at {zip_path} already exists but cannot overwrite.") with zipfile.ZipFile(zip_path.ospath, "w") as zip: zip.write(index_path.ospath, index_path.basename()) for path, name in index.calc_relative_paths(tmpdir_path, list(artifact_map)).items(): diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py index ac75275d06..892bcaa8f5 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py @@ -1296,8 +1296,9 @@ def retrieve_artifacts_zip( self, refs: Iterable[DatasetRef], destination: ResourcePathExpression, + overwrite: bool = True, ) -> ResourcePath: - return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts) + return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts, overwrite) def retrieveArtifacts( self, diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 43ab1697c7..63d8b7c9be 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -479,8 +479,9 @@ def retrieve_artifacts_zip( self, refs: Iterable[DatasetRef], destination: ResourcePathExpression, + overwrite: bool = True, ) -> ResourcePath: - return retrieve_and_zip(refs, destination, self._retrieve_artifacts) + return retrieve_and_zip(refs, destination, self._retrieve_artifacts, overwrite) def retrieveArtifacts( self, diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py index 45b1c3e4b0..842e0fae00 100644 --- a/python/lsst/daf/butler/script/retrieveArtifacts.py +++ b/python/lsst/daf/butler/script/retrieveArtifacts.py @@ -126,7 +126,7 @@ def retrieveArtifacts( refs, destination=destination, transfer=transfer, preserve_path=preserve_path, overwrite=clobber ) else: - zip_file = butler.retrieve_artifacts_zip(refs, destination=destination) + zip_file = butler.retrieve_artifacts_zip(refs, destination=destination, overwrite=clobber) transferred = [zip_file] return transferred diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py index 5fef275fbf..41181de15c 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler.py +++ b/python/lsst/daf/butler/tests/hybrid_butler.py @@ -210,8 +210,9 @@ def retrieve_artifacts_zip( self, refs: Iterable[DatasetRef], destination: ResourcePathExpression, + overwrite: bool = True, ) -> ResourcePath: - return self._remote_butler.retrieve_artifacts_zip(refs, destination) + return self._remote_butler.retrieve_artifacts_zip(refs, destination, overwrite) def retrieveArtifacts( self,