From 2b7081fe6af8b9f692d293afb7c1055e925db684 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Mon, 8 Nov 2021 20:29:56 -0500 Subject: [PATCH 01/14] Begin to add support for cloud protocols in open_file --- elf/io/files.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/elf/io/files.py b/elf/io/files.py index 24974f2..a37a38d 100644 --- a/elf/io/files.py +++ b/elf/io/files.py @@ -26,6 +26,11 @@ def open_file(path, mode='a', ext=None): ext [str] - file extension. This can be used to force an extension if it cannot be inferred from the filename. (default: None) """ + # Before checking the extension suffix, check for "protocol-style" + # cloud provider prefixes. + if "://" in path: + ext = path.split("://")[0] + "://" + ext = os.path.splitext(path)[1] if ext is None else ext try: constructor = FILE_CONSTRUCTORS[ext.lower()] From 590b8a9e40a55fabb14269cab7a7833e2c52b225 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Mon, 8 Nov 2021 20:54:42 -0500 Subject: [PATCH 02/14] Add intern shims to extensions.py --- elf/io/extensions.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/elf/io/extensions.py b/elf/io/extensions.py index 30081a4..b957985 100644 --- a/elf/io/extensions.py +++ b/elf/io/extensions.py @@ -72,6 +72,44 @@ def register_filetype(constructor, extensions=(), groups=(), datasets=()): except ImportError: mrcfile = None +# add bossdb extensions if we have intern +try: + from intern import array as _InternDataset + + # Create a new class to be the intern analog of the h5 File class + + class _InternGroup: + + def __init__(self, filename, mode='r', **kwargs): + self.filename = filename + self.mode = mode + self.array = _InternDataset(self.filename) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + return + + def __getitem__(self, key): + return self.array + + def __setitem__(self, key, value): + return None + + def __delitem__(self, key): + return None + + def keys(self): + return [self.filename] + class _InternFile(_InternGroup): + pass + + + register_filetype(_InternFile, [".intern"], _InternGroup, _InternDataset) + +except ImportError: + pass def identity(arg): return arg From b81aa5c4d5a0c9321b4efd7f9f222b068ef5fff0 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Mon, 8 Nov 2021 21:05:26 -0500 Subject: [PATCH 03/14] Update intern_wrapper to better match MRC --- elf/io/extensions.py | 37 ++----------------- elf/io/intern_wrapper.py | 76 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 34 deletions(-) create mode 100644 elf/io/intern_wrapper.py diff --git a/elf/io/extensions.py b/elf/io/extensions.py index b957985..ce4ddd5 100644 --- a/elf/io/extensions.py +++ b/elf/io/extensions.py @@ -5,6 +5,7 @@ from .image_stack_wrapper import ImageStackFile, ImageStackDataset from .knossos_wrapper import KnossosFile, KnossosDataset from .mrc_wrapper import MRCFile, MRCDataset +from .intern_wrapper import InternFile, InternDataset __all__ = [ @@ -74,40 +75,8 @@ def register_filetype(constructor, extensions=(), groups=(), datasets=()): # add bossdb extensions if we have intern try: - from intern import array as _InternDataset - - # Create a new class to be the intern analog of the h5 File class - - class _InternGroup: - - def __init__(self, filename, mode='r', **kwargs): - self.filename = filename - self.mode = mode - self.array = _InternDataset(self.filename) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - return - - def __getitem__(self, key): - return self.array - - def __setitem__(self, key, value): - return None - - def __delitem__(self, key): - return None - - def keys(self): - return [self.filename] - class _InternFile(_InternGroup): - pass - - - register_filetype(_InternFile, [".intern"], _InternGroup, _InternDataset) - + import intern + register_filetype(InternFile, [".intern"], InternFile, InternDataset) except ImportError: pass diff --git a/elf/io/intern_wrapper.py b/elf/io/intern_wrapper.py new file mode 100644 index 0000000..28669ed --- /dev/null +++ b/elf/io/intern_wrapper.py @@ -0,0 +1,76 @@ +from collections.abc import Mapping + +try: + from intern import array +except ImportError: + pass + + +class InternDataset: + def __init__(self, cloud_path): + self._data = array(cloud_path) + + @property + def dtype(self): + return self._data.dtype + + @property + def ndim(self): + return 3 # todo: this COULD be 4 etc... + + # TODO chunks are arbitrary, how do we handle this? + @property + def chunks(self): + return None + + @property + def shape(self): + return self._data.shape + + def __getitem__(self, key): + return self._data[key] + + def __setitem__(self, key, value): + self._data[key] = value + + @property + def size(self): + shape = self._data.shape + return shape[0] * shape[1] * shape[2] + + # dummy attrs to be compatible with h5py/z5py/zarr API + @property + def attrs(self): + return {} + + +class InternFile(Mapping): + """ Wrapper for an intern dataset + """ + + def __init__(self, path, mode="r"): + self.path = path + self.mode = mode + + def __getitem__(self, key): + return InternDataset(self.path) + + def __iter__(self): + yield "data" + + def __len__(self): + return 1 + + def __contains__(self, name): + return name == "data" + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._f.close() + + # dummy attrs to be compatible with h5py/z5py/zarr API + @property + def attrs(self): + return {} From 25a0180b157641343664c52bf44295985c93cbc0 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Mon, 8 Nov 2021 21:06:41 -0500 Subject: [PATCH 04/14] Add is_intern check --- elf/io/files.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/elf/io/files.py b/elf/io/files.py index a37a38d..81d16fc 100644 --- a/elf/io/files.py +++ b/elf/io/files.py @@ -5,6 +5,7 @@ ) from .knossos_wrapper import KnossosFile, KnossosDataset from .mrc_wrapper import MRCFile, MRCDataset +from .intern_wrapper import InternFile, InternDataset def supported_extensions(): @@ -86,3 +87,8 @@ def is_mrc(node): """ Check if this is a MRCWrapper object. """ return isinstance(node, (MRCFile, MRCDataset)) + +def is_intern(node): + """ Check if this is a Intern wrapper object. + """ + return isinstance(node, (InternFile, InternDataset)) \ No newline at end of file From 509d2c5208fc0f69d00f7fec0db221895286ff94 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Mon, 8 Nov 2021 21:09:29 -0500 Subject: [PATCH 05/14] Use protocol style notation for intern ext --- elf/io/extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elf/io/extensions.py b/elf/io/extensions.py index ce4ddd5..eba23b4 100644 --- a/elf/io/extensions.py +++ b/elf/io/extensions.py @@ -76,7 +76,7 @@ def register_filetype(constructor, extensions=(), groups=(), datasets=()): # add bossdb extensions if we have intern try: import intern - register_filetype(InternFile, [".intern"], InternFile, InternDataset) + register_filetype(InternFile, ["bossdb://"], InternFile, InternDataset) except ImportError: pass From e0254ef8ec4cc819147dbd96ec8894c494320007 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Mon, 8 Nov 2021 21:18:51 -0500 Subject: [PATCH 06/14] Update setup.py --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 56671f3..2ce5216 100644 --- a/setup.py +++ b/setup.py @@ -16,12 +16,13 @@ extras = { "hdf5": "h5py", "zarr": "zarr", - "n5": "pyn5" + "n5": "pyn5", + "cloud": "intern" } # dependencies only available via conda, # we still collect them here, because the conda recipe -# gets it's requirements from setuptools. +# gets its requirements from setuptools. conda_only = ["vigra", "nifty", "z5py"] # collect all dependencies for conda From 067ccc746dda517c1ae1febd11744b0158246995 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Wed, 10 Nov 2021 19:12:28 -0500 Subject: [PATCH 07/14] Add a check for intern import failure --- elf/io/intern_wrapper.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/elf/io/intern_wrapper.py b/elf/io/intern_wrapper.py index 28669ed..50afbec 100644 --- a/elf/io/intern_wrapper.py +++ b/elf/io/intern_wrapper.py @@ -3,11 +3,23 @@ try: from intern import array except ImportError: - pass + intern = None + + +def _check_intern_importable(): + if intern is None: + raise ImportError( + "Could not import the `intern` library. This means you cannot " + "download or upload cloud datasets. To fix this, you can install " + "intern with: \n\n\t" + "pip install intern" + ) + return True class InternDataset: def __init__(self, cloud_path): + _check_intern_importable() self._data = array(cloud_path) @property @@ -49,6 +61,7 @@ class InternFile(Mapping): """ def __init__(self, path, mode="r"): + _check_intern_importable() self.path = path self.mode = mode From 945d3e6ac6f6dcbace15c97439f6bf3e1db6ccb1 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Wed, 10 Nov 2021 19:33:42 -0500 Subject: [PATCH 08/14] Add intern wrapper unit tests --- test/io_tests/test_intern_wrapper.py | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 test/io_tests/test_intern_wrapper.py diff --git a/test/io_tests/test_intern_wrapper.py b/test/io_tests/test_intern_wrapper.py new file mode 100644 index 0000000..3833f80 --- /dev/null +++ b/test/io_tests/test_intern_wrapper.py @@ -0,0 +1,41 @@ +import os +import unittest +from shutil import rmtree + +import numpy as np + +try: + import intern +except ImportError: + intern = None + + +@unittest.skipIf(intern is None, "Needs intern (pip install intern)") +class TestInternWrapper(unittest.TestCase): + def test_can_access_dataset(self): + from elf.io.intern_wrapper import InternDataset + + # Choosing a dataset at random to make sure we can access shape and dtype + ds = InternDataset("bossdb://witvliet2020/Dataset_1/em") + self.assertEqual(ds.shape, (300, 36000, 22000)) + self.assertEqual(ds.dtype, np.uint8) + self.assertEqual(ds.size, 300 * 36000 * 22000) + self.assertEqual(ds.ndim, 3) + + def test_can_download_dataset(self): + from elf.io.intern_wrapper import InternDataset + + ds = InternDataset("bossdb://witvliet2020/Dataset_1/em") + cutout = ds[210:211, 7000:7064, 7000:7064] + self.assertEqual(cutout.shape, (1, 64, 64)) + + def test_file(self): + from elf.io.intern_wrapper import InternFile, InternDataset + + f = InternFile("bossdb://witvliet2020/Dataset_1/em") + ds = f["data"] + self.assertIsInstance(ds, InternDataset) + + +if __name__ == "__main__": + unittest.main() From d4280222dfc634283405eeafd286881ddb90a6d1 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Thu, 11 Nov 2021 09:06:23 -0500 Subject: [PATCH 09/14] Update intern tests to verify data can be accessed --- elf/io/intern_wrapper.py | 6 ++++-- test/io_tests/test_intern_wrapper.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/elf/io/intern_wrapper.py b/elf/io/intern_wrapper.py index 50afbec..b1e037a 100644 --- a/elf/io/intern_wrapper.py +++ b/elf/io/intern_wrapper.py @@ -2,12 +2,14 @@ try: from intern import array + + intern_imported = True except ImportError: - intern = None + intern_imported = False def _check_intern_importable(): - if intern is None: + if not intern_imported: raise ImportError( "Could not import the `intern` library. This means you cannot " "download or upload cloud datasets. To fix this, you can install " diff --git a/test/io_tests/test_intern_wrapper.py b/test/io_tests/test_intern_wrapper.py index 3833f80..5535df8 100644 --- a/test/io_tests/test_intern_wrapper.py +++ b/test/io_tests/test_intern_wrapper.py @@ -26,8 +26,15 @@ def test_can_download_dataset(self): from elf.io.intern_wrapper import InternDataset ds = InternDataset("bossdb://witvliet2020/Dataset_1/em") - cutout = ds[210:211, 7000:7064, 7000:7064] - self.assertEqual(cutout.shape, (1, 64, 64)) + cutout = ds[210:212, 7000:7064, 7000:7064] + self.assertEqual(cutout.shape, (2, 64, 64)) + # Pick a few random points to verify. (This is a static dataset so + # this won't fail unless the internet connection is broken.) + # These are known "magic numbers" from a known-working intern install. + self.assertEqual(cutout[0, 0, 0], 127) + self.assertEqual(cutout[0, 0, 42], 142) + self.assertEqual(cutout[0, 42, 1], 122) + self.assertEqual(cutout[1, 4, 7], 134) def test_file(self): from elf.io.intern_wrapper import InternFile, InternDataset From d5d3d2b176e118cc4931a38536a856f4b119cc3c Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Thu, 11 Nov 2021 11:02:49 -0500 Subject: [PATCH 10/14] Add intern to intsallation environment --- .github/workflows/environment.yaml | 5 +++-- environment.yaml | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/environment.yaml b/.github/workflows/environment.yaml index 2759ccd..c7a9846 100644 --- a/.github/workflows/environment.yaml +++ b/.github/workflows/environment.yaml @@ -4,14 +4,15 @@ name: elf-dev dependencies: - affogato - - imageio - h5py + - imageio + - intern - mrcfile - nifty >=1.1 - numba - pandas - - python - pip + - python - scikit-image - skan - tqdm diff --git a/environment.yaml b/environment.yaml index 3ae84c9..f72c3ab 100644 --- a/environment.yaml +++ b/environment.yaml @@ -4,8 +4,9 @@ name: elf-dev dependencies: - affogato - - imageio - h5py + - imageio + - intern - mrcfile - nifty - numba From 0d1a2b6e5233fc64471f197a9977d89eee97edf1 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Thu, 11 Nov 2021 11:33:40 -0500 Subject: [PATCH 11/14] Fix test typo --- test/io_tests/test_intern_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/io_tests/test_intern_wrapper.py b/test/io_tests/test_intern_wrapper.py index 5535df8..07e8cb3 100644 --- a/test/io_tests/test_intern_wrapper.py +++ b/test/io_tests/test_intern_wrapper.py @@ -17,9 +17,9 @@ def test_can_access_dataset(self): # Choosing a dataset at random to make sure we can access shape and dtype ds = InternDataset("bossdb://witvliet2020/Dataset_1/em") - self.assertEqual(ds.shape, (300, 36000, 22000)) + self.assertEqual(ds.shape, (300, 26000, 22000)) self.assertEqual(ds.dtype, np.uint8) - self.assertEqual(ds.size, 300 * 36000 * 22000) + self.assertEqual(ds.size, 300 * 26000 * 22000) self.assertEqual(ds.ndim, 3) def test_can_download_dataset(self): From 9932ccc4a66d278497e4713e6d3dcfa0890a7c71 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 11 Nov 2021 17:54:03 +0100 Subject: [PATCH 12/14] Update elf/io/intern_wrapper.py --- elf/io/intern_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elf/io/intern_wrapper.py b/elf/io/intern_wrapper.py index b1e037a..066a99d 100644 --- a/elf/io/intern_wrapper.py +++ b/elf/io/intern_wrapper.py @@ -26,7 +26,7 @@ def __init__(self, cloud_path): @property def dtype(self): - return self._data.dtype + return np.dtype(self._data.dtype) @property def ndim(self): From b87f36e8afa50fccb4928da7646736fae5e84b25 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 11 Nov 2021 17:54:10 +0100 Subject: [PATCH 13/14] Update elf/io/intern_wrapper.py --- elf/io/intern_wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elf/io/intern_wrapper.py b/elf/io/intern_wrapper.py index 066a99d..e8da2ce 100644 --- a/elf/io/intern_wrapper.py +++ b/elf/io/intern_wrapper.py @@ -1,4 +1,5 @@ from collections.abc import Mapping +import numpy as np try: from intern import array From 8f9edfd5a1500fc3c3c20d14f9b3101b7eb3be8c Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Thu, 11 Nov 2021 22:06:13 +0100 Subject: [PATCH 14/14] Update test_intern_wrapper.py --- test/io_tests/test_intern_wrapper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/io_tests/test_intern_wrapper.py b/test/io_tests/test_intern_wrapper.py index 07e8cb3..3db40fb 100644 --- a/test/io_tests/test_intern_wrapper.py +++ b/test/io_tests/test_intern_wrapper.py @@ -5,12 +5,12 @@ import numpy as np try: - import intern + from intern import array except ImportError: - intern = None + array = None -@unittest.skipIf(intern is None, "Needs intern (pip install intern)") +@unittest.skipIf(array is None, "Needs intern (pip install intern)") class TestInternWrapper(unittest.TestCase): def test_can_access_dataset(self): from elf.io.intern_wrapper import InternDataset