Skip to content

Commit

Permalink
Update the ome.zarr example creation
Browse files Browse the repository at this point in the history
  • Loading branch information
constantinpape committed Jun 11, 2021
1 parent 158c941 commit 301482b
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 27 deletions.
53 changes: 39 additions & 14 deletions elf/io/ngff.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import skimage.transform
# we use zarr here because z5py does not support nested directory for the zarr format
import zarr
Expand All @@ -21,41 +22,65 @@ def _validate_axes_names(ndim, axes_names):
assert len(axes_names) == ndim
val_axes = tuple(axes_names)
if ndim == 2:
assert val_axes == ('y', 'x')
assert val_axes == ('y', 'x'), str(val_axes)
elif ndim == 3:
assert val_axes in [('z', 'y', 'x'), ('c', 'y', 'x'), ('t', 'y', 'x')]
assert val_axes in [('z', 'y', 'x'), ('c', 'y', 'x'), ('t', 'y', 'x')], str(val_axes)
elif ndim == 4:
assert val_axes in [('t', 'z', 'y', 'x'), ('c', 'z' 'y', 'x'), ('t', 'c', 'y', 'x')]
assert val_axes in [('t', 'z', 'y', 'x'), ('c', 'z', 'y', 'x'), ('t', 'c', 'y', 'x')], str(val_axes)
else:
assert val_axes == ('t', 'c', 'z', 'y', 'x')
assert val_axes == ('t', 'c', 'z', 'y', 'x'), str(val_axes)


# TODO downscale only in spatial dimensions
def _downscale(data, downscaler, kwargs):
data = downscaler(data, **kwargs).astype(data.dtype)
def _downscale(data, axes_names, downscaler, kwargs):
is_spatial = [ax in ('z', 'y', 'x') for ax in axes_names]
# downscaling is easy if we only have spatial axes
if all(is_spatial):
data = downscaler(data, **kwargs).astype(data.dtype)
else:
spatial_start = [i for i, spatial in enumerate(is_spatial) if spatial][0]
assert spatial_start in (1, 2), str(spatial_start)
if spatial_start == 1:
downscaled_data = []
for d in data:
ds = downscaler(d, **kwargs).astype(data.dtype)
downscaled_data.append(ds[None])
data = np.concatenate(downscaled_data, axis=0)
else:
downscaled_data = []
for time_slice in data:
downscaled_channel = []
for channel_slice in time_slice:
ds = downscaler(channel_slice, **kwargs).astype(data.dtype)
downscaled_channel.append(ds[None])
downscaled_channel = np.concatenate(downscaled_channel, axis=0)
downscaled_data.append(downscaled_channel[None])
data = np.concatenate(downscaled_data, axis=0)
return data


# TODO expose dimension separator as param
def write_ome_zarr(data, path, axes_names, name, n_scales,
key=None, chunks=None,
downscaler=skimage.transform.rescale,
kwargs={"scale": (0.5, 0.5, 0.5), "order": 0, "preserve_range": True}):
kwargs={"scale": (0.5, 0.5, 0.5), "order": 0, "preserve_range": True},
dimension_separator="/"):
"""Write numpy data to ome.zarr format.
"""

assert dimension_separator in (".", "/")
assert 2 <= data.ndim <= 5
_validate_axes_names(data.ndim, axes_names)

chunks = _get_chunks(axes_names) if chunks is None else chunks
store = zarr.NestedDirectoryStore(path, dimension_separator="/")
if dimension_separator == "/":
store = zarr.NestedDirectoryStore(path, dimension_separator=dimension_separator)
else:
store = zarr.DirectoryStore(path, dimension_separator=dimension_separator)

with zarr.open(store, mode='a') as f:
g = f if key is None else f.require_group(key)
g.create_dataset('s0', data=data, chunks=chunks, dimension_separator="/")
g.create_dataset('s0', data=data, chunks=chunks, dimension_separator=dimension_separator)
for ii in range(1, n_scales):
data = _downscale(data, downscaler, kwargs)
g.create_dataset(f's{ii}', data=data, chunks=chunks, dimension_separator="/")
data = _downscale(data, axes_names, downscaler, kwargs)
g.create_dataset(f's{ii}', data=data, chunks=chunks, dimension_separator=dimension_separator)
function_name = f'{downscaler.__module__}.{downscaler.__name__}'
create_ngff_metadata(g, name, axes_names,
type_=function_name, metadata=kwargs)
Expand Down
37 changes: 37 additions & 0 deletions example/io/check_ngff_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import argparse
import os
from glob import glob

import napari
import zarr


def check_example(ff):
fname = os.path.split(ff)[1]
print("Checking", fname)
if fname.startswith('flat'):
store = zarr.storage.DirectoryStore(ff)
else:
store = zarr.storage.NestedDirectoryStore(ff)

with zarr.open(store, mode='r') as f:
data = f['s0'][:]
print(data.shape)

with napari.gui_qt():
v = napari.Viewer()
v.title = fname
v.add_image(data)


def check_examples(folder):
files = glob(os.path.join(folder, '*.ome.zarr'))
for ff in files:
check_example(ff)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('folder')
args = parser.parse_args()
check_examples(args.folder)
134 changes: 121 additions & 13 deletions example/io/ngff_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,29 @@ def _kwargs_3d():

def _load_data(path, key, bb):
if key is None:
data = imageio.imread(path)
data = imageio.volread(path)
data = data[bb]
else:
with open_file(path, 'r') as f:
data = f[key][bb]
return data


def _create_example(in_path, folder, axes, key=None, bb=np.s_[:]):
def _create_example(in_path, folder, axes, key=None, bb=np.s_[:], dimension_separator="/"):
ax_name = ''.join(axes)
if dimension_separator == "/":
out_path = os.path.join(folder, f"{ax_name}.ome.zarr")
else:
out_path = os.path.join(folder, f"flat_{ax_name}.ome.zarr")
if os.path.exists(out_path):
print("Example data at", out_path, "is already present")
return

data = _load_data(in_path, key, bb)
assert data.ndim == len(axes)
ax_name = ''.join(axes)
out_path = os.path.join(folder, f"{ax_name}.ome.zr")
kwargs = _kwargs_3d() if axes[-3:] == ('z', 'y', 'x') else _kwargs_2d()
write_ome_zarr(data, out_path, axes, ax_name,
n_scales=3, kwargs=kwargs)
n_scales=3, kwargs=kwargs, dimension_separator=dimension_separator)

#
# create ngff ome.zarr example data
Expand All @@ -42,30 +49,131 @@ def _create_example(in_path, folder, axes, key=None, bb=np.s_[:]):

# axes: yx
def create_2d_example(folder):
# yx: covid htm data with only nucleus channel
in_path = os.path.join("/g/kreshuk/data/covid/covid-data-vibor/20200405_test_images",
"WellC01_PointC01_0000_ChannelDAPI,WF_GFP,TRITC,WF_Cy5_Seq0216.tiff")
_create_example(in_path, folder, axes=("y", "x"), bb=np.s_[0, :, :])


# add linked labels for the zyx example
def _make_t_volume(path, timepoints, out_path, scale=2):
if os.path.exists(out_path):
return
data = []
for tp in timepoints:
key = f'setup0/timepoint{tp}/s{scale}'
with open_file(path, 'r') as f:
d = f[key][:]
data.append(d[None])
data = np.concatenate(data, axis=0)
with open_file(out_path, 'w') as f:
f.create_dataset('data', data=data, chunks=(1, 64, 64, 64))


# TODO add linked labels for the zyx example
# axes: zyx, cyx, tyx
def create_3d_examples(folder):
pass


# axes: tcyx, tzyx, czyx
# zyx: covid em data + labels
raw_path = os.path.join('/g/kreshuk/pape/Work/mobie/covid-em-datasets/data',
'Covid19-S4-Area2/images/local/sbem-6dpf-1-whole-raw.n5')
raw_key = 'setup0/timepoint0/s3'
_create_example(raw_path, folder, axes=("z", "y", "x"), key=raw_key)
# for linked labels
# seg_path = os.path.join('/g/kreshuk/pape/Work/mobie/covid-em-datasets/data',
# 'Covid19-S4-Area2/images/local/s4_area2_segmentation.n5')
# seg_key = 'setup0/timepoint0/s3'

# cyx: covid htm data with more channels
in_path = os.path.join("/g/kreshuk/data/covid/covid-data-vibor/20200405_test_images",
"WellC01_PointC01_0000_ChannelDAPI,WF_GFP,TRITC,WF_Cy5_Seq0216.tiff")
_create_example(in_path, folder, axes=("c", "y", "x"), bb=np.s_[:3, :, :])

# tyx: middle slice from arabidopsis dataset
timepoints = [32, 33, 34]
scale = 2
raw_path = os.path.join('/g/kreshuk/pape/Work/mobie/arabidopsis-root-lm-datasets/data',
'arabidopsis-root/images/local/lm-membranes.n5')
tmp_path = './tp_data.h5'
_make_t_volume(raw_path, timepoints, tmp_path, scale=scale)
_create_example(tmp_path, folder, axes=("t", "y", "x"), bb=np.s_[:, 200, :, :],
key='data')


def _make_tc_volume(path1, path2, timepoints, out_path, scale=2):
if os.path.exists(out_path):
return
data = []
for tp in timepoints:
key = f'setup0/timepoint{tp}/s{scale}'
with open_file(path1, 'r') as f:
d1 = f[key][:]
with open_file(path2, 'r') as f:
d2 = f[key][:]
d = np.concatenate([d1[None], d2[None]], axis=0)
data.append(d[None])
data = np.concatenate(data, axis=0)
with open_file(out_path, 'w') as f:
f.create_dataset('data', data=data, chunks=(1, 1, 64, 64, 64))


# axes: tzyx, tcyx, czyx
def create_4d_examples(folder):
pass

# tzyx: arabidopsis dataset (boundaries)
tmp_path = './tp_data.h5'
_create_example(tmp_path, folder, key="data", axes=("t", "z", "y", "x"), bb=np.s_[:])

# tcyx: arabidopsis boundaries and nuclei, middle slice
timepoints = [32, 33, 34]
scale = 2
path1 = '/g/kreshuk/pape/Work/mobie/arabidopsis-root-lm-datasets/data/arabidopsis-root/images/local/lm-membranes.n5'
path2 = '/g/kreshuk/pape/Work/mobie/arabidopsis-root-lm-datasets/data/arabidopsis-root/images/local/lm-nuclei.n5'
tmp_path = './tp_channel_data.h5'
_make_tc_volume(path1, path2, timepoints, tmp_path, scale=scale)
_create_example(tmp_path, folder, key="data", axes=("t", "c", "y", "x"), bb=np.s_[:, :, 200, :, :])

# czyx: arabidopsis dataset (boundaries + nuclei), single timepoint
_create_example(tmp_path, folder, key="data", axes=("c", "z", "y", "x"), bb=np.s_[0, :, :, :, :])


# axes: tczyx
def create_5d_example(folder):
pass
# tczyx: full arabidopsis dataset
tmp_path = './tp_channel_data.h5'
_create_example(tmp_path, folder, key="data", axes=("t", "c", "z", "y", "x"), bb=np.s_[:, :, :, :, :])


# using '.' dimension separator
def create_flat_example(folder):
pass
# yx: covid htm data with only nucleus channel
in_path = os.path.join("/g/kreshuk/data/covid/covid-data-vibor/20200405_test_images",
"WellC01_PointC01_0000_ChannelDAPI,WF_GFP,TRITC,WF_Cy5_Seq0216.tiff")
_create_example(in_path, folder, axes=("y", "x"), bb=np.s_[0, :, :], dimension_separator=".")


def copy_readme(output_folder, version):
readme = f"""
# Example data for OME-ZARR NGFF v{version}
This folder contains the following example ome.zarr files
- yx: 2d image, data is the nucleus channel of an image from [1]
- zyx: 3d volume, data is an EM volume from [2]
- cyx: 2d image with channels, image with 3 channels from [1]
- tyx: timeseries of 2d images, timeseries of central slice of membrane channel from [3]
- tzyx: timeseries of 3d images, timeseries of membrane channel volume from [3]
- tcyx: timeseries of images with channel, timeseries of central slice of membrane + nucleus channel from [3]
- czyx: 3d volume with channel, single timepoint of membrane and nucleus channel from [3]
- tczyx: timeseries of 3d volumes with channel, full data from [3]
- flat_yx: same as yx, but using flat chunk storage (dimension_separator=".") instead of nested storage
Publications:
[1] https://onlinelibrary.wiley.com/doi/full/10.1002/bies.202000257
[2] https://www.sciencedirect.com/science/article/pii/S193131282030620X
[3] https://elifesciences.org/articles/57613
"""
out_path = os.path.join(output_folder, 'Readme.md')
with open(out_path, 'w') as f:
f.write(readme)


if __name__ == '__main__':
Expand All @@ -82,4 +190,4 @@ def create_flat_example(folder):
create_4d_examples(output_folder)
create_5d_example(output_folder)
create_flat_example(output_folder)
# TODO copy README
copy_readme(output_folder, args.version)

0 comments on commit 301482b

Please sign in to comment.