diff --git a/.github/workflows/environment.yaml b/.github/workflows/environment.yaml index 2759ccd..c7a9846 100644 --- a/.github/workflows/environment.yaml +++ b/.github/workflows/environment.yaml @@ -4,14 +4,15 @@ name: elf-dev dependencies: - affogato - - imageio - h5py + - imageio + - intern - mrcfile - nifty >=1.1 - numba - pandas - - python - pip + - python - scikit-image - skan - tqdm diff --git a/elf/io/extensions.py b/elf/io/extensions.py index 30081a4..eba23b4 100644 --- a/elf/io/extensions.py +++ b/elf/io/extensions.py @@ -5,6 +5,7 @@ from .image_stack_wrapper import ImageStackFile, ImageStackDataset from .knossos_wrapper import KnossosFile, KnossosDataset from .mrc_wrapper import MRCFile, MRCDataset +from .intern_wrapper import InternFile, InternDataset __all__ = [ @@ -72,6 +73,12 @@ def register_filetype(constructor, extensions=(), groups=(), datasets=()): except ImportError: mrcfile = None +# add bossdb extensions if we have intern +try: + import intern + register_filetype(InternFile, ["bossdb://"], InternFile, InternDataset) +except ImportError: + pass def identity(arg): return arg diff --git a/elf/io/files.py b/elf/io/files.py index 24974f2..81d16fc 100644 --- a/elf/io/files.py +++ b/elf/io/files.py @@ -5,6 +5,7 @@ ) from .knossos_wrapper import KnossosFile, KnossosDataset from .mrc_wrapper import MRCFile, MRCDataset +from .intern_wrapper import InternFile, InternDataset def supported_extensions(): @@ -26,6 +27,11 @@ def open_file(path, mode='a', ext=None): ext [str] - file extension. This can be used to force an extension if it cannot be inferred from the filename. (default: None) """ + # Before checking the extension suffix, check for "protocol-style" + # cloud provider prefixes. + if "://" in path: + ext = path.split("://")[0] + "://" + ext = os.path.splitext(path)[1] if ext is None else ext try: constructor = FILE_CONSTRUCTORS[ext.lower()] @@ -81,3 +87,8 @@ def is_mrc(node): """ Check if this is a MRCWrapper object. """ return isinstance(node, (MRCFile, MRCDataset)) + +def is_intern(node): + """ Check if this is a Intern wrapper object. + """ + return isinstance(node, (InternFile, InternDataset)) \ No newline at end of file diff --git a/elf/io/intern_wrapper.py b/elf/io/intern_wrapper.py new file mode 100644 index 0000000..e8da2ce --- /dev/null +++ b/elf/io/intern_wrapper.py @@ -0,0 +1,92 @@ +from collections.abc import Mapping +import numpy as np + +try: + from intern import array + + intern_imported = True +except ImportError: + intern_imported = False + + +def _check_intern_importable(): + if not intern_imported: + raise ImportError( + "Could not import the `intern` library. This means you cannot " + "download or upload cloud datasets. To fix this, you can install " + "intern with: \n\n\t" + "pip install intern" + ) + return True + + +class InternDataset: + def __init__(self, cloud_path): + _check_intern_importable() + self._data = array(cloud_path) + + @property + def dtype(self): + return np.dtype(self._data.dtype) + + @property + def ndim(self): + return 3 # todo: this COULD be 4 etc... + + # TODO chunks are arbitrary, how do we handle this? + @property + def chunks(self): + return None + + @property + def shape(self): + return self._data.shape + + def __getitem__(self, key): + return self._data[key] + + def __setitem__(self, key, value): + self._data[key] = value + + @property + def size(self): + shape = self._data.shape + return shape[0] * shape[1] * shape[2] + + # dummy attrs to be compatible with h5py/z5py/zarr API + @property + def attrs(self): + return {} + + +class InternFile(Mapping): + """ Wrapper for an intern dataset + """ + + def __init__(self, path, mode="r"): + _check_intern_importable() + self.path = path + self.mode = mode + + def __getitem__(self, key): + return InternDataset(self.path) + + def __iter__(self): + yield "data" + + def __len__(self): + return 1 + + def __contains__(self, name): + return name == "data" + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._f.close() + + # dummy attrs to be compatible with h5py/z5py/zarr API + @property + def attrs(self): + return {} diff --git a/environment.yaml b/environment.yaml index 3ae84c9..f72c3ab 100644 --- a/environment.yaml +++ b/environment.yaml @@ -4,8 +4,9 @@ name: elf-dev dependencies: - affogato - - imageio - h5py + - imageio + - intern - mrcfile - nifty - numba diff --git a/setup.py b/setup.py index 56671f3..2ce5216 100644 --- a/setup.py +++ b/setup.py @@ -16,12 +16,13 @@ extras = { "hdf5": "h5py", "zarr": "zarr", - "n5": "pyn5" + "n5": "pyn5", + "cloud": "intern" } # dependencies only available via conda, # we still collect them here, because the conda recipe -# gets it's requirements from setuptools. +# gets its requirements from setuptools. conda_only = ["vigra", "nifty", "z5py"] # collect all dependencies for conda diff --git a/test/io_tests/test_intern_wrapper.py b/test/io_tests/test_intern_wrapper.py new file mode 100644 index 0000000..3db40fb --- /dev/null +++ b/test/io_tests/test_intern_wrapper.py @@ -0,0 +1,48 @@ +import os +import unittest +from shutil import rmtree + +import numpy as np + +try: + from intern import array +except ImportError: + array = None + + +@unittest.skipIf(array is None, "Needs intern (pip install intern)") +class TestInternWrapper(unittest.TestCase): + def test_can_access_dataset(self): + from elf.io.intern_wrapper import InternDataset + + # Choosing a dataset at random to make sure we can access shape and dtype + ds = InternDataset("bossdb://witvliet2020/Dataset_1/em") + self.assertEqual(ds.shape, (300, 26000, 22000)) + self.assertEqual(ds.dtype, np.uint8) + self.assertEqual(ds.size, 300 * 26000 * 22000) + self.assertEqual(ds.ndim, 3) + + def test_can_download_dataset(self): + from elf.io.intern_wrapper import InternDataset + + ds = InternDataset("bossdb://witvliet2020/Dataset_1/em") + cutout = ds[210:212, 7000:7064, 7000:7064] + self.assertEqual(cutout.shape, (2, 64, 64)) + # Pick a few random points to verify. (This is a static dataset so + # this won't fail unless the internet connection is broken.) + # These are known "magic numbers" from a known-working intern install. + self.assertEqual(cutout[0, 0, 0], 127) + self.assertEqual(cutout[0, 0, 42], 142) + self.assertEqual(cutout[0, 42, 1], 122) + self.assertEqual(cutout[1, 4, 7], 134) + + def test_file(self): + from elf.io.intern_wrapper import InternFile, InternDataset + + f = InternFile("bossdb://witvliet2020/Dataset_1/em") + ds = f["data"] + self.assertIsInstance(ds, InternDataset) + + +if __name__ == "__main__": + unittest.main()