Skip to content

Commit

Permalink
Support clobber/no-clobber when writing Zip archive
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Oct 29, 2024
1 parent 2b2eee2 commit 41d08e1
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 5 deletions.
4 changes: 4 additions & 0 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,7 @@ def retrieve_artifacts_zip(
self,
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
overwrite: bool = True,
) -> ResourcePath:
"""Retrieve artifacts from a Butler and place in ZIP file.
Expand All @@ -1003,6 +1004,9 @@ def retrieve_artifacts_zip(
Directory to write the new ZIP file. This directory will
also be used as a staging area for the datasets being downloaded
from the datastore.
overwrite : `bool`, optional
If `False` the output Zip will not be written if a file of the
same name is already present in ``destination``.
Returns
-------
Expand Down
6 changes: 5 additions & 1 deletion python/lsst/daf/butler/_quantum_backed.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def retrieve_artifacts_zip(
self,
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
overwrite: bool = True,
) -> ResourcePath:
"""Retrieve artifacts from the graph and place in ZIP file.
Expand All @@ -514,6 +515,9 @@ def retrieve_artifacts_zip(
Directory to write the new ZIP file. This directory will
also be used as a staging area for the datasets being downloaded
from the datastore.
overwrite : `bool`, optional
If `False` the output Zip will not be written if a file of the
same name is already present in ``destination``.
Returns
-------
Expand All @@ -525,7 +529,7 @@ def retrieve_artifacts_zip(
ValueError
Raised if there are no refs to retrieve.
"""
return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts)
return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts, overwrite)

def extract_provenance_data(self) -> QuantumProvenanceData:
"""Extract provenance information and datastore records from this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ def retrieve_and_zip(
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
retrieval_callback: RetrievalCallable,
overwrite: bool = True,
) -> ResourcePath:
"""Retrieve artifacts from a Butler and place in ZIP file.
Expand All @@ -501,6 +502,9 @@ def retrieve_and_zip(
Bound method for a function that can retrieve the artifacts and
return the metadata necessary for creating the zip index. For example
`lsst.daf.butler.datastore.Datastore.retrieveArtifacts`.
overwrite : `bool`, optional
If `False` the output Zip will not be written if a file of the
same name is already present in ``destination``.
Returns
-------
Expand Down Expand Up @@ -550,6 +554,8 @@ def retrieve_and_zip(
# Use unique name based on files in Zip.
zip_file_name = index.calculate_zip_file_name()
zip_path = outdir.join(zip_file_name, forceDirectory=False)
if not overwrite and zip_path.exists():
raise FileExistsError(f"Output Zip at {zip_path} already exists but cannot overwrite.")

Check warning on line 558 in python/lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py#L558

Added line #L558 was not covered by tests
with zipfile.ZipFile(zip_path.ospath, "w") as zip:
zip.write(index_path.ospath, index_path.basename())
for path, name in index.calc_relative_paths(tmpdir_path, list(artifact_map)).items():
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/direct_butler/_direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,8 +1296,9 @@ def retrieve_artifacts_zip(
self,
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
overwrite: bool = True,
) -> ResourcePath:
return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts)
return retrieve_and_zip(refs, destination, self._datastore.retrieveArtifacts, overwrite)

def retrieveArtifacts(
self,
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,9 @@ def retrieve_artifacts_zip(
self,
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
overwrite: bool = True,
) -> ResourcePath:
return retrieve_and_zip(refs, destination, self._retrieve_artifacts)
return retrieve_and_zip(refs, destination, self._retrieve_artifacts, overwrite)

def retrieveArtifacts(
self,
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/retrieveArtifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def retrieveArtifacts(
refs, destination=destination, transfer=transfer, preserve_path=preserve_path, overwrite=clobber
)
else:
zip_file = butler.retrieve_artifacts_zip(refs, destination=destination)
zip_file = butler.retrieve_artifacts_zip(refs, destination=destination, overwrite=clobber)
transferred = [zip_file]

return transferred
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/tests/hybrid_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,9 @@ def retrieve_artifacts_zip(
self,
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
overwrite: bool = True,
) -> ResourcePath:
return self._remote_butler.retrieve_artifacts_zip(refs, destination)
return self._remote_butler.retrieve_artifacts_zip(refs, destination, overwrite)

def retrieveArtifacts(
self,
Expand Down

0 comments on commit 41d08e1

Please sign in to comment.