Merge pull request #21 from pattonw/n5-volume

N5Volume
aschampion · Mar 14, 2019 · 3df1e06 · 3df1e06
2 parents 0533b79 + f88f7ea
commit 3df1e06
Show file tree

Hide file tree

Showing 5 changed files with 288 additions and 7 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,11 +1,10 @@
 sudo: required
 dist: trusty
 language: python
-python:
-    - 3.6
-    - 3.5
-    - 3.4
-    - 2.7
+jobs:
+  include:
+    - python: 3.6
+    - python: 3.5
 notifications:
   email: false
 install:
@@ -15,7 +14,7 @@ script: tox
 deploy:
   on:
     repo: aschampion/diluvian
-    python: 2.7
+    python: 3.5
     tags: true
   distributions: sdist bdist_wheel
   password:

diff --git a/diluvian/volumes.py b/diluvian/volumes.py
@@ -19,6 +19,7 @@
 from scipy import ndimage
 import six
 from six.moves import range as xrange
+import pyn5
 
 from .config import CONFIG
 from .octrees import OctreeVolume
@@ -1354,3 +1355,216 @@ def __next__(self):
             ctr = np.array([self.random.randint(self.ctr_min[n], self.ctr_max[n])
                             for n in range(3)]).astype(np.int64)
             return SubvolumeBounds(seed=ctr)
+
+
+class N5Volume(Volume):
+    """A Volume for using an N5 filesystem for image retrieval
+
+    Parameters
+    ----------
+    root_path : string
+        /absolute/path/to/data.n5
+    dataset : dict of dicts (dataset name to dataset config)
+        possible keys: ("mask","labels","image")
+        values: {"path": path, "dtype": dtype, "read_only": read_only}
+    resolution : iterable of float
+        Resolution of the pixels at zoom level 0 in nm.
+    translation : iterable of float
+        Translational offset in nm s.t. for given coordinate
+        a in pixel space, a*resolution+translation = b where
+        b is in the desired nm coordinates
+    bounds: iterable of int, optional
+        Shape of the stack at zoom level 0 in pixels.
+        necessary if the volume is missing an attributes file
+    tile_width, tile_height : int, optional
+        Size of tiles in pixels
+        necessary if the volume is missing an attributes file
+    """
+
+    def from_toml(filename):
+        volumes = {}
+        with open(filename, "rb") as fin:
+            volume_configs = toml.load(fin).get("N5Volume", [])
+            for volume_config in volume_configs:
+                root_path = volume_config["root_path"]
+                datasets = volume_config["datasets"]
+                resolution = volume_config.get("resolution", None)
+                translation = volume_config.get["translation", None]
+                bounds = volume_config.get("bounds", None)
+                volume = N5Volume(
+                    root_path,
+                    datasets,
+                    bounds,
+                    resolution,
+                    translation,
+                )
+                volumes[volume_config["title"]] = volume
+
+        return volumes
+
+    def __init__(
+        self,
+        root_path,
+        datasets,
+        bounds=None,
+        resolution=None,
+        translation=None,
+    ):
+
+        self._dtype_map = {
+            "UINT8": np.uint8,
+            "UINT16": np.uint16,
+            "UINT32": np.uint32,
+            "UINT64": np.uint64,
+            "INT8": np.int8,
+            "INT16": np.int16,
+            "INT32": np.int32,
+            "INT64": np.int64,
+            "FLOAT32": np.float32,
+            "FLOAT64": np.float64,
+        }
+        self.bounds = bounds
+        self.resolution = resolution
+        self.translation = translation
+
+        self.scale = np.exp2(np.array([0, 0, 0])).astype(np.int64)
+        self.data_shape = (np.array([0, 0, 0]), self.bounds / self.scale)
+
+        # Initialization of data sources done in setter methods
+        self.root_path = root_path
+        self.image_config = datasets.get("image", None)
+        self.mask_config = datasets.get("mask", None)
+        self.label_config = datasets.get("label", None)
+
+    @property
+    def dtype_map(self):
+        return self._dtype_map
+
+    def local_coord_to_world(self, a):
+        return np.multiply(a, self.scale)
+
+    def world_coord_to_local(self, a):
+        return np.floor_divide(a, self.scale)
+
+    def real_coord_to_world(self, a):
+        return np.floor_divide(a - self.translation, self.orig_resolution)
+
+    def world_coord_to_real(self, a):
+        return np.multiply(a, self.orig_resolution) + self.translation
+
+    @property
+    def octree_leaf_shape(self):
+        return np.array([10, 10, 10])
+
+    @property
+    def image_config(self):
+        return self._image_config
+
+    @image_config.setter
+    def image_config(self, dataset):
+        self._image_config = dataset
+        if dataset is not None:
+            self._image_data = OctreeVolume(
+                self.octree_leaf_shape,
+                self.data_shape,
+                self.dtype_map[dataset.get("dtype", "FLOAT32")],
+                populator=self.image_populator,
+            )
+        else:
+            self._image_data = None
+
+    @property
+    def image_data(self):
+        return self._image_data
+
+    @property
+    def mask_config(self):
+        return self._mask_config
+
+    @mask_config.setter
+    def mask_config(self, dataset):
+        self._mask_config = dataset
+        if dataset is not None:
+            self._mask_data = OctreeVolume(
+                self.octree_leaf_shape,
+                self.data_shape,
+                self.dtype_map[dataset.get("dtype", "FLOAT32")],
+                populator=self.mask_populator,
+            )
+        else:
+            self._mask_data = None
+
+    @property
+    def mask_data(self):
+        return self._mask_data
+
+    @property
+    def label_config(self):
+        return self._label_config
+
+    @label_config.setter
+    def label_config(self, dataset):
+        self._label_config = dataset
+        if dataset is not None:
+            self._label_data = OctreeVolume(
+                self.octree_leaf_shape,
+                self.data_shape,
+                self.dtype_map[dataset.get("dtype", "FLOAT32")],
+                populator=self.label_populator,
+            )
+        else:
+            self._label_data = None
+
+    @property
+    def label_data(self):
+        return self._label_data
+
+    @property
+    def image_n5(self):
+        """
+        Create a new pyn5.Dataset every time you ask for image_n5.
+        This is necessary to accomadate parrallel reads since multiple
+        threads can't use the same reader.
+        """
+        if self.image_config is not None:
+            return pyn5.open(
+                self.root_path,
+                self.image_config.get("path"),
+                self.image_config.get("dtype", "UINT8"),
+                self.image_config.get("read_only", True),
+                )
+        else:
+            return None
+
+    def image_populator(self, bounds):
+        return pyn5.read(self.image_n5, (bounds[0], bounds[1]))
+
+    @property
+    def mask_n5(self):
+        if self.mask_config is not None:
+            return pyn5.open(
+                self.root_path,
+                self.mask_config.get("path"),
+                self.mask_config.get("dtype", "UINT8"),
+                self.mask_config.get("read_only", True),
+                )
+        else:
+            return None
+
+    def mask_populator(self, bounds):
+        return pyn5.read(self.mask_n5, (bounds[0], bounds[1]))
+
+    @property
+    def label_n5(self):
+        if self.label_config is not None:
+            return pyn5.open(
+                self.root_path,
+                self.label_config.get("path"),
+                self.label_config.get("dtype", "UINT8"),
+                self.label_config.get("read_only", True),
+                )
+        else:
+            return None
+
+    def label_populator(self, bounds):
+        return pyn5.read(self.label_n5, bounds)
diff --git a/requirements/prod.txt b/requirements/prod.txt
@@ -12,3 +12,4 @@ scipy==0.19.1
 six==1.11.0
 tensorflow==1.8.0
 tqdm==4.19.1
+pyn5==0.1.0
diff --git a/tests/test_diluvian.py b/tests/test_diluvian.py
@@ -12,6 +12,9 @@
 from __future__ import division
 
 import numpy as np
+from pathlib import Path
+import shutil
+import pyn5
 
 from diluvian import octrees
 from diluvian import regions
@@ -181,6 +184,70 @@ def test_volume_transforms_image_stacks():
     )
 
 
+def test_volume_transforms_n5_volume():
+    # Create test n5 dataset
+    test_dataset_path = Path("test.n5")
+    if test_dataset_path.is_dir():
+        shutil.rmtree(str(test_dataset_path.absolute()))
+    pyn5.create_dataset("test.n5", "test", [10, 10, 10], [2, 2, 2], "UINT8")
+    test_dataset = pyn5.open("test.n5", "test")
+
+    test_data = np.zeros([10, 10, 10]).astype(int)
+    x = np.linspace(0, 9, 10).reshape([10, 1, 1]).astype(int)
+    test_data = test_data + x + x.transpose([1, 2, 0]) + x.transpose([2, 0, 1])
+
+    block_starts = [(i % 5, i // 5 % 5, i // 25 % 5) for i in range(5 ** 3)]
+    for block_start in block_starts:
+        current_bound = list(
+            map(slice, [2 * x for x in block_start], [2 * x + 2 for x in block_start])
+        )
+        flattened = test_data[current_bound].reshape(-1)
+        try:
+            test_dataset.write_block(block_start, flattened)
+        except Exception as e:
+            raise AssertionError("Writing to n5 failed! Could not create test dataset.\nError: {}".format(e))
+
+    v = volumes.N5Volume("test.n5",
+                         {"image": {"path": "test", "dtype": "UINT8"}},
+                         bounds=[10, 10, 10],
+                         resolution=[1, 1, 1])
+    pv = v.partition(
+        [2, 1, 1], [1, 0, 0]
+    )  # Note axes are flipped after volume initialization
+    dpv = pv.downsample((2, 2, 2))
+
+    np.testing.assert_array_equal(
+        dpv.local_coord_to_world(np.array([2, 2, 2])), np.array([9, 4, 4])
+    )
+    np.testing.assert_array_equal(
+        dpv.world_coord_to_local(np.array([9, 4, 4])), np.array([2, 2, 2])
+    )
+
+    svb = volumes.SubvolumeBounds(
+        np.array((5, 0, 0), dtype=np.int64), np.array((7, 2, 2), dtype=np.int64)
+    )
+    sv = v.get_subvolume(svb)
+
+    dpsvb = volumes.SubvolumeBounds(
+        np.array((0, 0, 0), dtype=np.int64), np.array((1, 1, 1), dtype=np.int64)
+    )
+    dpsv = dpv.get_subvolume(dpsvb)
+
+    np.testing.assert_array_equal(
+        dpsv.image, sv.image.reshape((1, 2, 1, 2, 1, 2)).mean(5).mean(3).mean(1)
+    )
+
+    # sanity check that test.n5 contains varying data
+    svb2 = volumes.SubvolumeBounds(
+        np.array((5, 0, 1), dtype=np.int64), np.array((7, 2, 3), dtype=np.int64)
+    )
+    sv2 = v.get_subvolume(svb2)
+    assert not all(sv.image.flatten() == sv2.image.flatten())
+
+    if test_dataset_path.is_dir():
+        shutil.rmtree(str(test_dataset_path.absolute()))
+
+
 def test_volume_identity_downsample_returns_self():
     resolution = (27, 185, 90)
     v = volumes.Volume(resolution, image_data=np.zeros((1, 1, 1)), label_data=np.zeros((1, 1, 1)))

diff --git a/tox.ini b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27, py34, py35, py36
+envlist = py35, py36
 
 [testenv]
 setenv =