Skip to content

Commit

Permalink
Start implementing real ToilFsAccess URL operations
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Oct 5, 2023
1 parent b73b9ef commit b7195f7
Showing 1 changed file with 43 additions and 5 deletions.
48 changes: 43 additions & 5 deletions src/toil/cwl/cwltoil.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import datetime
import errno
import functools
import glob
import json
import logging
import os
Expand Down Expand Up @@ -1273,13 +1274,50 @@ def download_to(url: str, dest: str) -> None:
return destination

def glob(self, pattern: str) -> List[str]:
# We know this falls back on _abs
return super().glob(pattern)
parse = urlparse(path)

This comment has been minimized.

Copy link
@mr-c

mr-c Oct 12, 2023

Contributor

Hey @adamnovak
I didn't know you started implementing this, so I'll leave some notes from what I did
This method isn't necessary to optimize

+        """Return a list of URIs that match the pattern."""
+        # This is only ever called by cwltool.command_line_tool, post execution.
+        # At that time, all files are local, so it is okay that this falls back
+        # on ToilFSAccess_abs().
if parse.scheme == "file":
pattern = os.path.abspath(unquote(parse.path))
elif parse.scheme == "":
pattern = os.path.abspath(pattern)
else:
raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")

# Actually do the glob
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]

def open(self, fn: str, mode: str) -> IO[Any]:
# TODO: Also implement JobStore-supported URLs through JobStore methods.
# We know this falls back on _abs
return super().open(fn, mode)
if "w" in mode or "x" in mode or "+" in mode or "a" in mode:
raise RuntimeError(f"Mode {mode} for opening {fn} involves writing")

This comment has been minimized.

Copy link
@mr-c

mr-c Oct 12, 2023

Contributor
+            # as of 2023-10-12, cwltool never calls fs_access.open()
+            # with a writable mode, only "rb" and "r".

On the cwltool side I can add a new StdFsAccess method named read() or similar and deprecate the vague open()


parse = urlparse(fn)
if parse.scheme in ["", "file"]:
# Handle local files
return open(self._abs(path), mode)
elif parse.scheme in ["toilfile", "toildir"]:
if self.file_store is None:
raise RuntimeError("URL requires a file store: " + fn)

encoding=None if "b" in mode else "utf-8"

if parse.scheme == "toildir":
contents, subpath, cache_key = decode_directory(path)
if cache_key in self.dir_to_download:
# This is already available locally
return open(self._abs(path), mode)
else:
# We need to take each path component from subpath and look
# them up in contents, until we find a thing that ought to
# be a string. Then that we need to decode as a FileID and
# stream.
raise NotImplementedError()
elif parse.scheme == "toilfile":
file_id = FileID.unpack(fn[len("toilfile:") :])

return self.file_store.readGlobalFileStream(, )
else:
# This should be supported by a job store.
# We need to make a pipe, send off a thread to AbstractJobStore.read_from_url into the pipe as bytes, and read out of the pipe in the appropriate encoding.

This comment has been minimized.

Copy link
@mr-c

mr-c Oct 12, 2023

Contributor

or add a new method that accepts a URL and performs similarly to jobStore.read_file_stream(), yes

raise NotImplementedError()

def exists(self, path: str) -> bool:
"""Test for file existence."""
Expand Down

0 comments on commit b7195f7

Please sign in to comment.