Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow working with remote files in CWL and WDL workflows #4690

Merged
merged 32 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b7195f7
Start implementing real ToilFsAccess URL operations
adamnovak Oct 5, 2023
c5d5167
Implement URL opening for CWL
adamnovak Oct 6, 2023
2fe1d65
Implement other ToilFsAccess operations without local copies
adamnovak Oct 6, 2023
364dffa
Remove getSize spelling and pass mypy
adamnovak Oct 6, 2023
c94912c
Add missing import
adamnovak Oct 6, 2023
0fa54f7
Remove check for extremely old setuptools
adamnovak Oct 6, 2023
389e5e9
Add --reference-inputs option to toil-cwl-runner
adamnovak Oct 12, 2023
51bd888
Allow files to be gotten by URI on the nodes
adamnovak Oct 12, 2023
5a4d36d
Add some tests to exercise URL references
adamnovak Oct 12, 2023
535e945
Implement URI access and import logic in WDL interpreter
adamnovak Oct 12, 2023
adc0751
Merge remote-tracking branch 'upstream/master' into issues/4147-leave…
adamnovak Nov 16, 2023
694b29a
Remove duplicated test
adamnovak Nov 16, 2023
be191fe
Fixc some merge problems
adamnovak Nov 16, 2023
ffc129e
Satisfy MyPy
adamnovak Nov 16, 2023
804ac25
Spell default correctly
adamnovak Nov 16, 2023
3438daa
Actually hook up import bypass flag
adamnovak Nov 16, 2023
e6f92b8
Actually pass self test when using URLs
adamnovak Nov 16, 2023
e7b0d06
Merge remote-tracking branch 'upstream/master' into issues/4147-leave…
adamnovak Nov 21, 2023
3d1e8f6
Make file job store volunteer for non-schemed URIs
adamnovak Nov 21, 2023
ee3ef61
Revert "Make file job store volunteer for non-schemed URIs"
adamnovak Nov 21, 2023
1d62c59
Handle size requests for bare filenames
adamnovak Nov 21, 2023
387cd0f
Handle polling for URL existence
adamnovak Nov 21, 2023
eb98426
Merge branch 'master' into issues/4147-leave-in-s3
mr-c Nov 30, 2023
739e1dd
Add a make test_debug target for getting test logs
adamnovak Dec 2, 2023
dbb9a8d
Add more logging to CWL streaming tests
adamnovak Dec 2, 2023
602a45c
Contemplate multi-threaded access to the CachingFileStore from user code
adamnovak Dec 2, 2023
c6d9682
Merge remote-tracking branch 'upstream/master' into issues/4147-leave…
adamnovak Dec 2, 2023
2d4ecf8
Merge remote-tracking branch 'upstream/master' into issues/4147-leave…
adamnovak Dec 5, 2023
e78e192
Allow downloading URLs in structures, and poll AWS directory existenc…
adamnovak Dec 5, 2023
b871ee1
Update tests to a Debian with ARM Docker images
adamnovak Dec 5, 2023
f087a44
Undo permission changes
adamnovak Dec 5, 2023
bb62e26
Add missing import
adamnovak Dec 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ test: check_venv check_build_reqs
TOIL_OWNER_TAG="shared" \
python -m pytest --durations=0 --strict-markers --log-level DEBUG --log-cli-level INFO -r s $(cov) -n $(threads) --dist loadscope $(tests) -m "$(marker)"

test_debug: check_venv check_build_reqs
TOIL_OWNER_TAG="$(whoami)" \
python -m pytest --durations=0 --strict-markers --log-level DEBUG -s -o log_cli=true --log-cli-level DEBUG -r s $(tests) -m "$(marker)" --tb=native --maxfail=1


# This target will skip building docker and all docker based tests
# these are our travis tests; rename?
Expand Down
380 changes: 272 additions & 108 deletions src/toil/cwl/cwltoil.py

Large diffs are not rendered by default.

59 changes: 49 additions & 10 deletions src/toil/cwl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

import logging
import os
from pathlib import PurePosixPath
import posixpath
import stat
from typing import (
Any,
Callable,
Expand All @@ -31,6 +34,7 @@

from toil.fileStores import FileID
from toil.fileStores.abstractFileStore import AbstractFileStore
from toil.jobStores.abstractJobStore import AbstractJobStore

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -128,6 +132,32 @@ def visit_cwl_class_and_reduce(

DirectoryStructure = Dict[str, Union[str, "DirectoryStructure"]]

def get_from_structure(dir_dict: DirectoryStructure, path: str) -> Union[str, DirectoryStructure, None]:
"""
Given a relative path, follow it in the given directory structure.

Return the string URI for files, the directory dict for
subdirectories, or None for nonexistent things.
"""

# Resolve .. and split into path components
parts = PurePosixPath(posixpath.normpath(path)).parts
if len(parts) == 0:
return dir_dict
if parts[0] in ('..', '/'):
raise RuntimeError(f"Path {path} not resolvable in virtual directory")
found: Union[str, DirectoryStructure] = dir_dict
for part in parts:
# Go down by each path component in turn
if isinstance(found, str):
# Looking for a subdirectory of a file, which doesn't exist
return None
if part not in found:
return None
found = found[part]
# Now we're at the place we want to be.
return found


def download_structure(
file_store: AbstractFileStore,
Expand All @@ -140,11 +170,12 @@ def download_structure(
Download nested dictionary from the Toil file store to a local path.

Guaranteed to fill the structure with real files, and not symlinks out of
it to elsewhere.
it to elsewhere. File URIs may be toilfile: URIs or any other URI that
Toil's job store system can read.

:param file_store: The Toil file store to download from.

:param index: Maps from downloaded file path back to input Toil URI.
:param index: Maps from downloaded file path back to input URI.

:param existing: Maps from file_store_id URI to downloaded file path.

Expand All @@ -171,16 +202,24 @@ def download_structure(
# This must be a file path uploaded to Toil.
if not isinstance(value, str):
raise RuntimeError(f"Did not find a file at {value}.")
if not value.startswith("toilfile:"):
raise RuntimeError(f"Did not find a filestore file at {value}")

logger.debug("Downloading contained file '%s'", name)
dest_path = os.path.join(into_dir, name)
# So download the file into place.
# Make sure to get a real copy of the file because we may need to
# mount the directory into a container as a whole.
file_store.readGlobalFile(
FileID.unpack(value[len("toilfile:") :]), dest_path, symlink=False
)

if value.startswith("toilfile:"):
# So download the file into place.
# Make sure to get a real copy of the file because we may need to
# mount the directory into a container as a whole.
file_store.readGlobalFile(
FileID.unpack(value[len("toilfile:") :]), dest_path, symlink=False
)
else:
# We need to download from some other kind of URL.
size, executable = AbstractJobStore.read_from_url(value, open(dest_path, 'wb'))
if executable:
# Make the written file executable
os.chmod(dest_path, os.stat(dest_path).st_mode | stat.S_IXUSR)

# Update the index dicts
# TODO: why?
index[dest_path] = value
Expand Down
19 changes: 18 additions & 1 deletion src/toil/fileStores/abstractFileStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
Generator,
Iterator,
List,
Literal,
Optional,
Set,
Tuple,
Type,
Union,
cast)
cast,
overload)

import dill

Expand Down Expand Up @@ -413,6 +415,21 @@ def readGlobalFile(
"""
raise NotImplementedError()

@overload
def readGlobalFileStream(
self,
fileStoreID: str,
encoding: Literal[None] = None,
errors: Optional[str] = None,
) -> ContextManager[IO[bytes]]:
...

@overload
def readGlobalFileStream(
self, fileStoreID: str, encoding: str, errors: Optional[str] = None
) -> ContextManager[IO[str]]:
...

@abstractmethod
def readGlobalFileStream(
self,
Expand Down
Loading