From 85ecf0f387de17546e6c6be6cad8a39bf3de35be Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Fri, 17 Jan 2025 12:10:38 +0000 Subject: [PATCH] Make `CompressedFile` a context manager to ensure that the archive is closed. --- lib/galaxy/datatypes/isa.py | 3 +- lib/galaxy/model/store/__init__.py | 3 +- lib/galaxy/tools/data_fetch.py | 4 +-- lib/galaxy/util/compression_utils.py | 30 +++++++++++++++++-- .../test/functional/test_shed_repositories.py | 3 +- test/integration/test_workflow_tasks.py | 6 ++-- test/unit/data/model/test_model_store.py | 9 +++--- test/unit/util/test_compression_util.py | 7 +++-- tools/data_source/upload.py | 3 +- 9 files changed, 51 insertions(+), 17 deletions(-) diff --git a/lib/galaxy/datatypes/isa.py b/lib/galaxy/datatypes/isa.py index 68cc2c38752f..a9ce835b5fd5 100644 --- a/lib/galaxy/datatypes/isa.py +++ b/lib/galaxy/datatypes/isa.py @@ -247,7 +247,8 @@ def groom_dataset_content(self, file_name: str) -> None: # perform extraction # For some ZIP files CompressedFile::extract() extract the file inside / instead of outputing it inside . So we first create a temporary folder, extract inside it, and move content to final destination. temp_folder = tempfile.mkdtemp() - CompressedFile(file_name).extract(temp_folder) + with CompressedFile(file_name) as cf: + cf.extract(temp_folder) shutil.rmtree(output_path) extracted_files = os.listdir(temp_folder) logger.debug(" ".join(extracted_files)) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index d758ac39507f..97ba0bd10697 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -3075,7 +3075,8 @@ def source_to_import_store( if ModelStoreFormat.is_compressed(model_store_format): try: temp_dir = mkdtemp() - target_dir = CompressedFile(target_path).extract(temp_dir) + with CompressedFile(target_path) as cf: + target_dir = cf.extract(temp_dir) finally: if delete: os.remove(target_path) diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index 8c25990ea73b..a6c1a5eb9021 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -442,8 +442,8 @@ def _decompress_target(upload_config: "UploadConfig", target: Dict[str, Any]): # fuzzy_root to False to literally interpret the target. fuzzy_root = target.get("fuzzy_root", True) temp_directory = os.path.abspath(tempfile.mkdtemp(prefix=elements_from_name, dir=upload_config.working_directory)) - cf = CompressedFile(elements_from_path) - result = cf.extract(temp_directory) + with CompressedFile(elements_from_path) as cf: + result = cf.extract(temp_directory) return result if fuzzy_root else temp_directory diff --git a/lib/galaxy/util/compression_utils.py b/lib/galaxy/util/compression_utils.py index c1585b61238e..da09f0c08eb0 100644 --- a/lib/galaxy/util/compression_utils.py +++ b/lib/galaxy/util/compression_utils.py @@ -8,6 +8,7 @@ import tarfile import tempfile import zipfile +from types import TracebackType from typing import ( Any, cast, @@ -18,10 +19,14 @@ Optional, overload, Tuple, + Type, Union, ) -from typing_extensions import Literal +from typing_extensions import ( + Literal, + Self, +) from galaxy.util.path import ( safe_relpath, @@ -167,12 +172,16 @@ def decompress_bytes_to_directory(content: bytes) -> str: with tempfile.NamedTemporaryFile(delete=False) as fp: fp.write(content) fp.close() - return CompressedFile(fp.name).extract(temp_directory) + with CompressedFile(fp.name) as cf: + outdir = cf.extract(temp_directory) + return outdir def decompress_path_to_directory(path: str) -> str: temp_directory = tempfile.mkdtemp() - return CompressedFile(path).extract(temp_directory) + with CompressedFile(path) as cf: + outdir = cf.extract(temp_directory) + return outdir class CompressedFile: @@ -361,6 +370,21 @@ def zipfile_ok(self, path_to_archive: StrPath) -> bool: return False return True + def __enter__(self) -> Self: + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> bool: + try: + self.archive.close() + return exc_type is None + except Exception: + return False + class FastZipFile(zipfile.ZipFile): """ diff --git a/lib/tool_shed/test/functional/test_shed_repositories.py b/lib/tool_shed/test/functional/test_shed_repositories.py index 6d7cd61262c6..3af6b4c8e201 100644 --- a/lib/tool_shed/test/functional/test_shed_repositories.py +++ b/lib/tool_shed/test/functional/test_shed_repositories.py @@ -228,7 +228,8 @@ def test_repository_search(self): def test_repo_tars(self): for index, repo_path in enumerate(repo_tars("column_maker")): - path = CompressedFile(repo_path).extract(tempfile.mkdtemp()) + with CompressedFile(repo_path) as cf: + path = cf.extract(tempfile.mkdtemp()) tool_xml_path = os.path.join(path, "column_maker.xml") tool_source = get_tool_source(config_file=tool_xml_path) tool_version = tool_source.parse_version() diff --git a/test/integration/test_workflow_tasks.py b/test/integration/test_workflow_tasks.py index 3f8869dc9109..f502b40b0030 100644 --- a/test/integration/test_workflow_tasks.py +++ b/test/integration/test_workflow_tasks.py @@ -60,11 +60,13 @@ def test_export_import_invocation_with_input_as_output_sts(self): def test_export_ro_crate_basic(self): ro_crate_path = self._export_invocation_to_format(extension="rocrate.zip", to_uri=False) - assert CompressedFile(ro_crate_path).file_type == "zip" + with CompressedFile(ro_crate_path) as cf: + assert cf.file_type == "zip" def test_export_ro_crate_to_uri(self): ro_crate_path = self._export_invocation_to_format(extension="rocrate.zip", to_uri=True) - assert CompressedFile(ro_crate_path).file_type == "zip" + with CompressedFile(ro_crate_path) as cf: + assert cf.file_type == "zip" def test_export_bco_basic(self): bco_path = self._export_invocation_to_format(extension="bco.json", to_uri=False) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 99d43c556251..aa13b658dfa6 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -646,9 +646,9 @@ def test_export_invocation_to_ro_crate_archive(tmp_path): crate_zip = tmp_path / "crate.zip" with store.ROCrateArchiveModelExportStore(crate_zip, app=app, export_files="symlink") as export_store: export_store.export_workflow_invocation(workflow_invocation) - compressed_file = CompressedFile(crate_zip) - assert compressed_file.file_type == "zip" - compressed_file.extract(tmp_path) + with CompressedFile(crate_zip) as compressed_file: + assert compressed_file.file_type == "zip" + compressed_file.extract(tmp_path) crate_directory = tmp_path / "crate" validate_invocation_crate_directory(crate_directory) @@ -1249,7 +1249,8 @@ class Options: def import_archive(archive_path, app, user, import_options=None): dest_parent = mkdtemp() - dest_dir = CompressedFile(archive_path).extract(dest_parent) + with CompressedFile(archive_path) as cf: + dest_dir = cf.extract(dest_parent) import_options = import_options or store.ImportOptions() model_store = store.get_import_model_store_for_directory( diff --git a/test/unit/util/test_compression_util.py b/test/unit/util/test_compression_util.py index 53d59520fd5c..cf0f28baca61 100644 --- a/test/unit/util/test_compression_util.py +++ b/test/unit/util/test_compression_util.py @@ -15,6 +15,7 @@ def test_compression_safety(self): self.assert_safety("test-data/unsafe.zip", False) self.assert_safety("test-data/4.bed.zip", True) self.assert_safety("test-data/testdir.tar", True) + self.assert_safety("test-data/testdir1.tar.gz", True) self.assert_safety("test-data/safetar_with_symlink.tar", True) self.assert_safety("test-data/safe_relative_symlink.tar", True) @@ -30,10 +31,12 @@ def assert_safety(self, path, expected_to_be_safe): temp_dir = tempfile.mkdtemp() try: if expected_to_be_safe: - CompressedFile(path).extract(temp_dir) + with CompressedFile(path) as cf: + cf.extract(temp_dir) else: with self.assertRaisesRegex(Exception, "is blocked"): - CompressedFile(path).extract(temp_dir) + with CompressedFile(path) as cf: + cf.extract(temp_dir) finally: shutil.rmtree(temp_dir, ignore_errors=True) diff --git a/tools/data_source/upload.py b/tools/data_source/upload.py index d6c6b108650e..a3b5c8089d30 100644 --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -226,7 +226,8 @@ def stage_file(name, composite_file_path, is_binary=False): # a little more explicitly so we didn't need to dispatch on the datatype and so we # could attach arbitrary extra composite data to an existing composite datatype if # if need be? Perhaps that would be a mistake though. - CompressedFile(dp).extract(files_path) + with CompressedFile(dp) as cf: + cf.extract(files_path) else: tmpdir = output_adjacent_tmpdir(output_path) tmp_prefix = "data_id_%s_convert_" % dataset.dataset_id