Skip to content

Commit

Permalink
Use CompressedFile.open_tar() to open tar files
Browse files Browse the repository at this point in the history
  • Loading branch information
nsoranzo committed Jan 17, 2025
1 parent 85ecf0f commit c71f410
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 51 deletions.
16 changes: 10 additions & 6 deletions lib/galaxy/tool_util/verify/interactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import re
import shutil
import sys
import tarfile
import tempfile
import time
import urllib.parse
Expand Down Expand Up @@ -45,6 +44,7 @@
)
from galaxy.util import requests
from galaxy.util.bunch import Bunch
from galaxy.util.compression_utils import CompressedFile
from galaxy.util.hash_util import (
memory_bound_hexdigest,
parse_checksum_hash,
Expand Down Expand Up @@ -434,7 +434,14 @@ def test_data_path(self, tool_id, filename, tool_version=None):
return result
raise Exception(result["err_msg"])

def test_data_download(self, tool_id, filename, mode="file", is_output=True, tool_version=None):
def test_data_download(
self,
tool_id: str,
filename: str,
mode: Literal["directory", "file"] = "file",
is_output: bool = True,
tool_version: Optional[str] = None,
):
result = None
local_path = None

Expand All @@ -453,10 +460,7 @@ def test_data_download(self, tool_id, filename, mode="file", is_output=True, too
contents.extractall(path=path)
else:
# Galaxy < 21.01
with tarfile.open(fileobj=fileobj) as tar_contents:
tar_contents.extraction_filter = getattr(
tarfile, "data_filter", (lambda member, path: member)
)
with CompressedFile.open_tar(fileobj) as tar_contents:
tar_contents.extractall(path=path)
result = path
else:
Expand Down
17 changes: 3 additions & 14 deletions lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from galaxy.files import ConfiguredFileSources
from galaxy.files.uris import stream_url_to_file
from galaxy.util.compression_utils import CompressedFile

# Set max size of archive/file that will be handled to be 100 GB. This is
# arbitrary and should be adjusted as needed.
Expand Down Expand Up @@ -52,19 +53,6 @@ def check_archive(archive_file, dest_dir):
return True


def unpack_archive(archive_file, dest_dir):
"""
Unpack a tar and/or gzipped archive into a destination directory.
"""
if zipfile.is_zipfile(archive_file):
with zipfile.ZipFile(archive_file, "r") as zip_archive:
zip_archive.extractall(path=dest_dir)
else:
with tarfile.open(archive_file, mode="r") as archive_fp:
archive_fp.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
archive_fp.extractall(path=dest_dir)


def main(options, args):
is_url = bool(options.is_url)
is_file = bool(options.is_file)
Expand All @@ -84,7 +72,8 @@ def main(options, args):

# Unpack archive.
check_archive(archive_file, dest_dir)
unpack_archive(archive_file, dest_dir)
with CompressedFile(archive_file) as cf:
cf.extract(dest_dir)


if __name__ == "__main__":
Expand Down
16 changes: 11 additions & 5 deletions lib/galaxy/util/compression_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,18 +345,24 @@ def isfile(self, member: ArchiveMemberType) -> bool:
return True
return False

def open_tar(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> tarfile.TarFile:
tf = tarfile.open(filepath, mode, errorlevel=0)
@staticmethod
def open_tar(file: Union[StrPath, IO[bytes]], mode: Literal["a", "r", "w", "x"] = "r") -> tarfile.TarFile:
if isinstance(file, (str, os.PathLike)):
tf = tarfile.open(file, mode=mode, errorlevel=0)
else:
tf = tarfile.open(mode=mode, fileobj=file, errorlevel=0)
# Set a safe default ("data_filter") for the extraction filter if
# available, reverting to Python 3.11 behavior otherwise, see
# https://docs.python.org/3/library/tarfile.html#supporting-older-python-versions
tf.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
return tf

def open_zip(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> zipfile.ZipFile:
return zipfile.ZipFile(filepath, mode)
@staticmethod
def open_zip(file: Union[StrPath, IO[bytes]], mode: Literal["a", "r", "w", "x"] = "r") -> zipfile.ZipFile:
return zipfile.ZipFile(file, mode)

def zipfile_ok(self, path_to_archive: StrPath) -> bool:
@staticmethod
def zipfile_ok(path_to_archive: StrPath) -> bool:
"""
This function is a bit pedantic and not functionally necessary. It checks whether there is
no file pointing outside of the extraction, because ZipFile.extractall() has some potential
Expand Down
12 changes: 7 additions & 5 deletions lib/tool_shed/test/base/twilltestcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
DEFAULT_SOCKET_TIMEOUT,
smart_str,
)
from galaxy.util.compression_utils import CompressedFile
from galaxy.util.resources import as_file
from galaxy_test.base.api_asserts import assert_status_code_is_ok
from galaxy_test.base.api_util import get_admin_api_key
from galaxy_test.base.populators import wait_on_assertion
Expand All @@ -64,7 +66,6 @@
hgweb_config,
xml_util,
)
from tool_shed.util.repository_content_util import tar_open
from tool_shed.webapp.model import Repository as DbRepository
from tool_shed_client.schema import (
Category,
Expand Down Expand Up @@ -1146,7 +1147,8 @@ def add_file_to_repository(
target = os.path.basename(source)
full_target = os.path.join(temp_directory, target)
full_source = TEST_DATA_REPO_FILES.joinpath(source)
shutil.copyfile(str(full_source), full_target)
with as_file(full_source) as full_source_path:
shutil.copyfile(full_source_path, full_target)
commit_message = commit_message or "Uploaded revision with added file."
self._upload_dir_to_repository(
repository, temp_directory, commit_message=commit_message, strings_displayed=strings_displayed
Expand All @@ -1155,9 +1157,9 @@ def add_file_to_repository(
def add_tar_to_repository(self, repository: Repository, source: str, strings_displayed=None):
with self.cloned_repo(repository) as temp_directory:
full_source = TEST_DATA_REPO_FILES.joinpath(source)
tar = tar_open(full_source)
tar.extractall(path=temp_directory)
tar.close()
with full_source.open("rb") as full_source_fileobj:
with CompressedFile.open_tar(full_source_fileobj) as tar:
tar.extractall(path=temp_directory)
commit_message = "Uploaded revision with added files from tar."
self._upload_dir_to_repository(
repository, temp_directory, commit_message=commit_message, strings_displayed=strings_displayed
Expand Down
20 changes: 2 additions & 18 deletions lib/tool_shed/util/repository_content_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import shutil
import tarfile
import tempfile
from typing import (
Optional,
Expand All @@ -9,7 +8,7 @@

import tool_shed.repository_types.util as rt_util
from galaxy.tool_shed.util.hg_util import clone_repository
from galaxy.util import checkers
from galaxy.util.compression_utils import CompressedFile
from tool_shed.dependencies.attribute_handlers import (
RepositoryDependencyAttributeHandler,
ToolDependencyAttributeHandler,
Expand All @@ -26,21 +25,6 @@
from tool_shed.webapp.model import Repository


def tar_open(uploaded_file):
isgzip = False
isbz2 = False
isgzip = checkers.is_gzip(uploaded_file)
if not isgzip:
isbz2 = checkers.is_bz2(uploaded_file)
if isgzip or isbz2:
# Open for reading with transparent compression.
tar = tarfile.open(uploaded_file, "r:*")
else:
tar = tarfile.open(uploaded_file)
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
return tar


def upload_tar(
trans: "ProvidesRepositoriesContext",
username: str,
Expand All @@ -55,7 +39,7 @@ def upload_tar(
) -> ChangeResponseT:
host = trans.repositories_hostname
app = trans.app
tar = tar_open(uploaded_file)
tar = CompressedFile.open_tar(uploaded_file)
rdah = rdah or RepositoryDependencyAttributeHandler(trans, unpopulate=False)
tdah = tdah or ToolDependencyAttributeHandler(trans, unpopulate=False)
# Upload a tar archive of files.
Expand Down
5 changes: 2 additions & 3 deletions test/unit/tool_shed/test_shed_index.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import shutil
import tarfile
import tempfile
from collections import namedtuple
from io import BytesIO
Expand All @@ -9,6 +8,7 @@
import requests
from whoosh import index

from galaxy.util.compression_utils import CompressedFile
from tool_shed.util.shed_index import build_index

URL = "https://github.com/mvdbeek/toolshed-test-data/blob/master/toolshed_community_files.tgz?raw=true"
Expand All @@ -29,8 +29,7 @@ def community_file_dir():
response = requests.get(URL)
response.raise_for_status()
b = BytesIO(response.content)
with tarfile.open(fileobj=b, mode="r:gz") as tar:
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
with CompressedFile.open_tar(b) as tar:
tar.extractall(extracted_archive_dir)
try:
yield extracted_archive_dir
Expand Down

0 comments on commit c71f410

Please sign in to comment.