From f123096f69330d4d8cd507e4c17bfd393aa29104 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 30 Aug 2017 15:28:48 -0400 Subject: [PATCH 1/8] Compute projection instead of dictionary --- nanshe_ipython.ipynb | 107 ++++++------------------------------------- 1 file changed, 15 insertions(+), 92 deletions(-) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index cfc79e5..fe63083 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -1136,10 +1136,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Normalize Data\n", + "### Project\n", "\n", "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n", - "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)." + "* `proj_type` (`str`): type of projection to take." ] }, { @@ -1152,12 +1152,12 @@ "\n", "\n", "block_frames = 40\n", - "norm_frames = 100\n", + "proj_type = \"max\"\n", "\n", "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", - "io_remove(data_basename + postfix_norm + zarr_ext)\n", - "io_remove(data_basename + postfix_norm + h5_ext)\n", + "io_remove(data_basename + postfix_dict + zarr_ext)\n", + "io_remove(data_basename + postfix_dict + h5_ext)\n", "\n", "\n", "with open_zarr(data_basename + postfix_wt + zarr_ext, \"r\") as f:\n", @@ -1173,10 +1173,14 @@ " da_imgs_flt.dtype.itemsize >= 4):\n", " da_imgs_flt = da_imgs_flt.astype(np.float32)\n", "\n", - " da_result = normalize_data(da_imgs)\n", + " da_result = da_imgs\n", + " if proj_type == \"max\":\n", + " da_result = da_result.max(axis=0, keepdims=True)\n", + " elif proj_type == \"std\":\n", + " da_result = da_result.std(axis=0, keepdims=True)\n", "\n", " # Store denoised data\n", - " with open_zarr(data_basename + postfix_norm + zarr_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n", " result = f2.create_dataset(\n", " \"images\",\n", " shape=da_result.shape,\n", @@ -1188,86 +1192,6 @@ " dask.distributed.progress(status, notebook=False)\n", "\n", "\n", - "zip_zarr(data_basename + postfix_norm + zarr_ext)\n", - "\n", - "with h5py.File(data_basename + postfix_norm + h5_ext, \"w\") as f2:\n", - " with open_zarr(data_basename + postfix_norm + zarr_ext, \"r\") as f1:\n", - " zarr_to_hdf5(f1, f2)\n", - "\n", - "\n", - "if __IPYTHON__:\n", - " result_image_stack = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n", - "\n", - " mplsv = plt.figure(FigureClass=MPLViewer)\n", - " mplsv.set_images(\n", - " result_image_stack,\n", - " vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n", - " vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dictionary Learning\n", - "\n", - "* `n_components` (`int`): number of basis images in the dictionary.\n", - "* `batchsize` (`int`): minibatch size to use.\n", - "* `iters` (`int`): number of iterations to run before getting dictionary.\n", - "* `lambda1` (`float`): weight for L1 sparisty enforcement on sparse code.\n", - "* `lambda2` (`float`): weight for L2 sparisty enforcement on sparse code.\n", - "\n", - "
\n", - "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n", - "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "\n", - "\n", - "n_components = 50\n", - "batchsize = 256\n", - "iters = 100\n", - "lambda1 = 0.2\n", - "lambda2 = 0.0\n", - "\n", - "block_frames = 51\n", - "norm_frames = 100\n", - "\n", - "\n", - "# Somehow we can't overwrite the file in the container so this is needed.\n", - "io_remove(data_basename + postfix_dict + zarr_ext)\n", - "io_remove(data_basename + postfix_dict + h5_ext)\n", - "\n", - "result = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n", - "block_shape = (block_frames,) + result.shape[1:]\n", - "with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n", - " new_result = f2.create_dataset(\"images\", shape=(n_components,) + result.shape[1:], dtype=result.dtype, chunks=True)\n", - "\n", - " result = par_generate_dictionary(block_shape)(\n", - " result,\n", - " n_components=n_components,\n", - " out=new_result,\n", - " **{\"sklearn.decomposition.dict_learning_online\" : {\n", - " \"n_jobs\" : 1,\n", - " \"n_iter\" : iters,\n", - " \"batch_size\" : batchsize,\n", - " \"alpha\" : lambda1\n", - " }\n", - " }\n", - " )\n", - "\n", - " result_j = f2.create_dataset(\"images_j\", shape=new_result.shape, dtype=numpy.uint16, chunks=True)\n", - " par_norm_layer(num_frames=norm_frames)(result, out=result_j)\n", - "\n", - "\n", "zip_zarr(data_basename + postfix_dict + zarr_ext)\n", "\n", "with h5py.File(data_basename + postfix_dict + h5_ext, \"w\") as f2:\n", @@ -1276,15 +1200,14 @@ "\n", "\n", "if __IPYTHON__:\n", - " result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")[...][...]\n", "\n", " mplsv = plt.figure(FigureClass=MPLViewer)\n", " mplsv.set_images(\n", " result_image_stack,\n", - " vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n", - " vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n", - " )\n", - " mplsv.time_nav.stime.label.set_text(\"Basis Image\")" + " vmin=result_image_stack.min(),\n", + " vmax=result_image_stack.max()\n", + " )" ] }, { From 94e036b6a77c30a13cb0778b03bc8c10f9ea88db Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 25 Aug 2017 10:35:34 -0400 Subject: [PATCH 2/8] Require dask-ndmeasure Needed to perform connected components and to constrain the resulting label image. --- nanshe_workflow.recipe/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/nanshe_workflow.recipe/meta.yaml b/nanshe_workflow.recipe/meta.yaml index d414e37..45aa83f 100644 --- a/nanshe_workflow.recipe/meta.yaml +++ b/nanshe_workflow.recipe/meta.yaml @@ -31,6 +31,7 @@ requirements: - dask-imread - dask-ndfilters - dask-ndfourier + - dask-ndmeasure - runipy - cloudpickle - tifffile From 71bba632f5a98f6eaafd8ca9a4487fb0f25e67c5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 25 Aug 2017 15:32:25 -0400 Subject: [PATCH 3/8] Rewrite post-processing Simply use a threshold with no other constraints. --- nanshe_ipython.ipynb | 121 ++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 71 deletions(-) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index fe63083..9a416cb 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -184,6 +184,7 @@ " import dask_imread\n", " import dask_ndfilters\n", " import dask_ndfourier\n", + " import dask_ndmeasure\n", "\n", " from toolz import sliding_window\n", "\n", @@ -1138,8 +1139,12 @@ "source": [ "### Project\n", "\n", + "* `proj_type` (`str`): type of projection to take.\n", + "\n", + "
\n", + "\n", "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n", - "* `proj_type` (`str`): type of projection to take." + "* `block_space` (`int`): extent of each spatial dimension for each block (run in parallel)." ] }, { @@ -1151,9 +1156,11 @@ "%%time\n", "\n", "\n", - "block_frames = 40\n", "proj_type = \"max\"\n", "\n", + "block_frames = 40\n", + "block_space = 300\n", + "\n", "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", "io_remove(data_basename + postfix_dict + zarr_ext)\n", @@ -1217,15 +1224,7 @@ "### Postprocessing\n", "\n", "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n", - "* `wavelet_scale` (`int`): scale of wavelet transform to apply (should be the same as the one used above)\n", - "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant\n", - "* `accepted_region_shape_constraints` (`dict`): if ROIs don't match this, reduce the `wavelet_scale` once.\n", - "* `percentage_pixels_below_max` (`float`): upper bound on ratio of ROI pixels not at max intensity vs. all ROI pixels\n", - "* `min_local_max_distance` (`float`): minimum allowable euclidean distance between two ROIs maximum intensities\n", - "* `accepted_neuron_shape_constraints` (`dict`): shape constraints for ROI to be kept.\n", - "\n", - "* `alignment_min_threshold` (`float`): similarity measure of the intensity of two ROIs images used for merging.\n", - "* `overlap_min_threshold` (`float`): similarity measure of the masks of two ROIs used for merging." + "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant" ] }, { @@ -1238,80 +1237,60 @@ "\n", "\n", "significance_threshold = 3.0\n", - "wavelet_scale = 3\n", - "noise_threshold = 3.0\n", - "percentage_pixels_below_max = 0.8\n", - "min_local_max_distance = 16.0\n", - "\n", - "alignment_min_threshold = 0.6\n", - "overlap_min_threshold = 0.6\n", + "noise_threshold = 1.0\n", "\n", "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", "io_remove(data_basename + postfix_post + zarr_ext)\n", "io_remove(data_basename + postfix_post + h5_ext)\n", "\n", - "result = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")\n", - "with open_zarr(data_basename + postfix_post + zarr_ext, \"w\") as f2:\n", - " result = par_postprocess_data(result,\n", - " **{\n", - " \"wavelet_denoising\" : {\n", - " \"estimate_noise\" : {\n", - " \"significance_threshold\" : significance_threshold\n", - " },\n", - " \"wavelet.transform\" : {\n", - " \"scale\" : wavelet_scale\n", - " },\n", - " \"significant_mask\" : {\n", - " \"noise_threshold\" : noise_threshold\n", - " },\n", - " \"accepted_region_shape_constraints\" : {\n", - " \"major_axis_length\" : {\n", - " \"min\" : 0.0,\n", - " \"max\" : 25.0\n", - " }\n", - " },\n", - " \"remove_low_intensity_local_maxima\" : {\n", - " \"percentage_pixels_below_max\" : percentage_pixels_below_max\n", - " },\n", - " \"remove_too_close_local_maxima\" : {\n", - " \"min_local_max_distance\" : min_local_max_distance\n", - " },\n", - " \"accepted_neuron_shape_constraints\" : {\n", - " \"area\" : {\n", - " \"min\" : 25,\n", - " \"max\" : 600\n", - " },\n", - " \"eccentricity\" : {\n", - " \"min\" : 0.0,\n", - " \"max\" : 0.9\n", - " }\n", - " }\n", - " },\n", - " \"merge_neuron_sets\" : {\n", - " \"alignment_min_threshold\" : alignment_min_threshold,\n", - " \"overlap_min_threshold\" : overlap_min_threshold,\n", - " \"fuse_neurons\" : {\n", - " \"fraction_mean_neuron_max_threshold\" : 0.01\n", - " }\n", - " }\n", - " }\n", - " )\n", "\n", - " f2.create_group(\"rois\")\n", - " for each_name in result.dtype.names:\n", - " f2.require_group(\"rois\").create_dataset(\n", - " each_name,\n", - " data=result[each_name],\n", - " chunks=True\n", + "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n", + " with get_executor(client) as executor:\n", + " imgs = f[\"images\"]\n", + " da_imgs = da.from_array(\n", + " imgs, chunks=imgs.shape\n", " )\n", + " da_imgs = da_imgs[0]\n", + "\n", + " da_imgs_thrd = (da_imgs - noise_threshold * (da_imgs - significance_threshold * da_imgs.std()).std()) > 0\n", + "\n", + " da_lbl_img, da_num_lbls = dask_ndmeasure.label(da_imgs_thrd)\n", + " da_lbl_img, da_num_lbls = executor.persist([da_lbl_img, da_num_lbls])\n", + "\n", + " da_result = []\n", + " for i in irange(1, 1 + int(da_num_lbls)):\n", + " da_result.append(da_lbl_img == i)\n", + " da_result = da.stack(da_result)\n", + "\n", + " with open_zarr(data_basename + postfix_post + zarr_ext, \"w\") as f2:\n", + " result = f2.create_group(\"rois\").create_dataset(\n", + " \"mask\",\n", + " shape=da_result.shape,\n", + " dtype=da_result.dtype,\n", + " chunks=True\n", + " )\n", + " da_result = da_result.rechunk(result.chunks)\n", + " status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n", + " dask.distributed.progress(status, notebook=False)\n", "\n", "\n", "zip_zarr(data_basename + postfix_post + zarr_ext)\n", "\n", "with h5py.File(data_basename + postfix_post + h5_ext, \"w\") as f2:\n", " with open_zarr(data_basename + postfix_post + zarr_ext, \"r\") as f1:\n", - " zarr_to_hdf5(f1, f2)" + " zarr_to_hdf5(f1, f2)\n", + "\n", + "\n", + "if __IPYTHON__:\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_post + zarr_ext, \"rois/mask\")[...][...].astype(np.uint8)\n", + "\n", + " mplsv = plt.figure(FigureClass=MPLViewer)\n", + " mplsv.set_images(\n", + " result_image_stack,\n", + " vmin=result_image_stack.min(),\n", + " vmax=result_image_stack.max()\n", + " )" ] }, { From 3c2a7add1f85e54793d53c0e49c0ba98f71c02c5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 30 Aug 2017 13:03:08 -0400 Subject: [PATCH 4/8] Constrain area --- nanshe_ipython.ipynb | 100 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 6 deletions(-) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index 9a416cb..722503d 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -53,6 +53,7 @@ "postfix_wt = \"_wt\"\n", "postfix_norm = \"_norm\"\n", "postfix_dict = \"_dict\"\n", + "postfix_cc = \"_cc\"\n", "postfix_post = \"_post\"\n", "postfix_rois = \"_rois\"\n", "postfix_traces = \"_traces\"\n", @@ -560,7 +561,7 @@ "%%time\n", "\n", "\n", - "num_reps = 5\n", + "num_reps = 0\n", "tmpl_hist_wght = 0.25\n", "thld_rel_dist = 0.0\n", "\n", @@ -1221,7 +1222,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Postprocessing\n", + "### Connected components\n", "\n", "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n", "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant" @@ -1241,8 +1242,8 @@ "\n", "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", - "io_remove(data_basename + postfix_post + zarr_ext)\n", - "io_remove(data_basename + postfix_post + h5_ext)\n", + "io_remove(data_basename + postfix_cc + zarr_ext)\n", + "io_remove(data_basename + postfix_cc + h5_ext)\n", "\n", "\n", "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n", @@ -1260,8 +1261,95 @@ "\n", " da_result = []\n", " for i in irange(1, 1 + int(da_num_lbls)):\n", - " da_result.append(da_lbl_img == i)\n", - " da_result = da.stack(da_result)\n", + " da_result.append((da_lbl_img == i)[None])\n", + " da_result = da.concatenate(da_result)\n", + "\n", + " with open_zarr(data_basename + postfix_cc + zarr_ext, \"w\") as f2:\n", + " result = f2.create_group(\"rois\").create_dataset(\n", + " \"mask\",\n", + " shape=da_result.shape,\n", + " dtype=da_result.dtype,\n", + " chunks=True\n", + " )\n", + " da_result = da_result.rechunk(result.chunks)\n", + " status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n", + " dask.distributed.progress(status, notebook=False)\n", + "\n", + "\n", + "zip_zarr(data_basename + postfix_cc + zarr_ext)\n", + "\n", + "with h5py.File(data_basename + postfix_cc + h5_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_cc + zarr_ext, \"r\") as f1:\n", + " zarr_to_hdf5(f1, f2)\n", + "\n", + "\n", + "if __IPYTHON__:\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_cc + zarr_ext, \"rois/mask\")[...][...].astype(np.uint8)\n", + "\n", + " mplsv = plt.figure(FigureClass=MPLViewer)\n", + " mplsv.set_images(\n", + " result_image_stack,\n", + " vmin=result_image_stack.min(),\n", + " vmax=result_image_stack.max()\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ROI Refinement\n", + "\n", + "* `area_min_threshold` (`float`): minimum area required for all ROIs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "\n", + "area_min_threshold = 20.0\n", + "\n", + "\n", + "# Somehow we can't overwrite the file in the container so this is needed.\n", + "io_remove(data_basename + postfix_post + zarr_ext)\n", + "io_remove(data_basename + postfix_post + h5_ext)\n", + "\n", + "\n", + "with open_zarr(data_basename + postfix_cc + zarr_ext, \"r\") as f:\n", + " with get_executor(client) as executor:\n", + " imgs = f[\"rois/mask\"]\n", + " da_imgs = da.from_array(\n", + " imgs, chunks=imgs.shape\n", + " )\n", + "\n", + " da_num_lbls = len(da_imgs)\n", + " da_lbl_img = (\n", + " np.arange(1, 1 + da_num_lbls)[(slice(None),) + da_lbl_img.ndim * (None,)] * da_imgs\n", + " ).sum(axis=0)\n", + "\n", + " da_area_lbls = dask_ndmeasure.sum(\n", + " da.ones(da_lbl_img.shape, dtype=int, chunks=da_lbl_img.chunks),\n", + " da_lbl_img,\n", + " list(irange(1, 1 + int(da_num_lbls)))\n", + " )\n", + "\n", + " da_lbl_img, da_num_lbls = dask_ndmeasure.label(\n", + " (\n", + " ((da_area_lbls >= area_min_threshold)[(slice(None),) + da_lbl_img.ndim * (None,)] * da_lbl_img) > 0\n", + " ).sum(axis=0)\n", + " )\n", + "\n", + " da_lbl_img, da_num_lbls = executor.persist([da_lbl_img, da_num_lbls])\n", + "\n", + " da_result = []\n", + " for i in irange(1, 1 + int(da_num_lbls)):\n", + " da_result.append((da_lbl_img == i)[None])\n", + " da_result = da.concatenate(da_result)\n", "\n", " with open_zarr(data_basename + postfix_post + zarr_ext, \"w\") as f2:\n", " result = f2.create_group(\"rois\").create_dataset(\n", From fe3381d4f013645ee0e3daa12dd42ce2ca4a1ed5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 6 Sep 2017 10:43:18 -0400 Subject: [PATCH 5/8] Threshold processed data with mask --- nanshe_ipython.ipynb | 74 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index 722503d..6151cd0 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -55,6 +55,7 @@ "postfix_dict = \"_dict\"\n", "postfix_cc = \"_cc\"\n", "postfix_post = \"_post\"\n", + "postfix_thrd = \"_thrd\"\n", "postfix_rois = \"_rois\"\n", "postfix_traces = \"_traces\"\n", "postfix_proj = \"_proj\"\n", @@ -1381,6 +1382,79 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Threshold data\n", + "\n", + "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n", + "* `block_space` (`int`): extent of each spatial dimension for each block (run in parallel)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "\n", + "block_frames = 40\n", + "block_space = 300\n", + "\n", + "\n", + "# Somehow we can't overwrite the file in the container so this is needed.\n", + "io_remove(data_basename + postfix_thrd + zarr_ext)\n", + "io_remove(data_basename + postfix_thrd + h5_ext)\n", + "\n", + "with open_zarr(data_basename + postfix_wt + zarr_ext, \"r\") as f:\n", + " with open_zarr(data_basename + postfix_post + zarr_ext, \"r\") as f2:\n", + " with get_executor(client) as executor:\n", + " # Load and prep data for computation.\n", + " imgs = f[\"images\"]\n", + " da_imgs = da.from_array(\n", + " imgs, chunks=(block_frames,) + (imgs.ndim - 1) * (block_space,)\n", + " )\n", + " msks = f2[\"rois/mask\"]\n", + " da_msks = da.from_array(\n", + " msks, chunks=(block_frames,) + (imgs.ndim - 1) * (block_space,)\n", + " )\n", + "\n", + " da_result = da_imgs * da_msks.max(axis=0, keepdims=True).astype(da_imgs.dtype)\n", + "\n", + " # Store data\n", + " with open_zarr(data_basename + postfix_thrd + zarr_ext, \"w\") as f2:\n", + " result = f2.create_dataset(\n", + " \"images\",\n", + " shape=da_result.shape,\n", + " dtype=da_result.dtype,\n", + " chunks=True\n", + " )\n", + " da_result = da_result.rechunk(result.chunks)\n", + " status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n", + " dask.distributed.progress(status, notebook=False)\n", + "\n", + "\n", + "zip_zarr(data_basename + postfix_thrd + zarr_ext)\n", + "\n", + "with h5py.File(data_basename + postfix_thrd + h5_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_thrd + zarr_ext, \"r\") as f1:\n", + " zarr_to_hdf5(f1, f2)\n", + "\n", + "\n", + "if __IPYTHON__:\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_thrd + zarr_ext, \"images\")[...][...]\n", + "\n", + " mplsv = plt.figure(FigureClass=MPLViewer)\n", + " mplsv.set_images(\n", + " result_image_stack,\n", + " vmin=result_image_stack.min(),\n", + " vmax=result_image_stack.max()\n", + " )" + ] + }, { "cell_type": "markdown", "metadata": {}, From 6cdc32abf17c39c33469f4fee363f2743047abc5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 6 Sep 2017 11:15:14 -0400 Subject: [PATCH 6/8] Find dictionary on thresholded data --- nanshe_ipython.ipynb | 100 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 9 deletions(-) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index 6151cd0..c888f02 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -52,10 +52,11 @@ "postfix_f_f0 = \"_f_f0\"\n", "postfix_wt = \"_wt\"\n", "postfix_norm = \"_norm\"\n", - "postfix_dict = \"_dict\"\n", + "postfix_flat = \"_flat\"\n", "postfix_cc = \"_cc\"\n", "postfix_post = \"_post\"\n", "postfix_thrd = \"_thrd\"\n", + "postfix_dict = \"_dict\"\n", "postfix_rois = \"_rois\"\n", "postfix_traces = \"_traces\"\n", "postfix_proj = \"_proj\"\n", @@ -1165,8 +1166,8 @@ "\n", "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", - "io_remove(data_basename + postfix_dict + zarr_ext)\n", - "io_remove(data_basename + postfix_dict + h5_ext)\n", + "io_remove(data_basename + postfix_flat + zarr_ext)\n", + "io_remove(data_basename + postfix_flat + h5_ext)\n", "\n", "\n", "with open_zarr(data_basename + postfix_wt + zarr_ext, \"r\") as f:\n", @@ -1189,7 +1190,7 @@ " da_result = da_result.std(axis=0, keepdims=True)\n", "\n", " # Store denoised data\n", - " with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_flat + zarr_ext, \"w\") as f2:\n", " result = f2.create_dataset(\n", " \"images\",\n", " shape=da_result.shape,\n", @@ -1201,15 +1202,15 @@ " dask.distributed.progress(status, notebook=False)\n", "\n", "\n", - "zip_zarr(data_basename + postfix_dict + zarr_ext)\n", + "zip_zarr(data_basename + postfix_flat + zarr_ext)\n", "\n", - "with h5py.File(data_basename + postfix_dict + h5_ext, \"w\") as f2:\n", - " with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f1:\n", + "with h5py.File(data_basename + postfix_flat + h5_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_flat + zarr_ext, \"r\") as f1:\n", " zarr_to_hdf5(f1, f2)\n", "\n", "\n", "if __IPYTHON__:\n", - " result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")[...][...]\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_flat + zarr_ext, \"images\")[...][...]\n", "\n", " mplsv = plt.figure(FigureClass=MPLViewer)\n", " mplsv.set_images(\n", @@ -1247,7 +1248,7 @@ "io_remove(data_basename + postfix_cc + h5_ext)\n", "\n", "\n", - "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n", + "with open_zarr(data_basename + postfix_flat + zarr_ext, \"r\") as f:\n", " with get_executor(client) as executor:\n", " imgs = f[\"images\"]\n", " da_imgs = da.from_array(\n", @@ -1455,6 +1456,87 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dictionary Learning\n", + "\n", + "* `n_components` (`int`): number of basis images in the dictionary.\n", + "* `batchsize` (`int`): minibatch size to use.\n", + "* `iters` (`int`): number of iterations to run before getting dictionary.\n", + "* `lambda1` (`float`): weight for L1 sparisty enforcement on sparse code.\n", + "* `lambda2` (`float`): weight for L2 sparisty enforcement on sparse code.\n", + "\n", + "
\n", + "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n", + "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "\n", + "n_components = 50\n", + "batchsize = 256\n", + "iters = 100\n", + "lambda1 = 0.2\n", + "lambda2 = 0.0\n", + "\n", + "block_frames = 51\n", + "norm_frames = 100\n", + "\n", + "\n", + "# Somehow we can't overwrite the file in the container so this is needed.\n", + "io_remove(data_basename + postfix_dict + zarr_ext)\n", + "io_remove(data_basename + postfix_dict + h5_ext)\n", + "\n", + "result = LazyZarrDataset(data_basename + postfix_thrd + zarr_ext, \"images\")\n", + "block_shape = (block_frames,) + result.shape[1:]\n", + "with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n", + " new_result = f2.create_dataset(\"images\", shape=(n_components,) + result.shape[1:], dtype=result.dtype, chunks=True)\n", + "\n", + " result = par_generate_dictionary(block_shape)(\n", + " result,\n", + " n_components=n_components,\n", + " out=new_result,\n", + " **{\"sklearn.decomposition.dict_learning_online\" : {\n", + " \"n_jobs\" : 1,\n", + " \"n_iter\" : iters,\n", + " \"batch_size\" : batchsize,\n", + " \"alpha\" : lambda1\n", + " }\n", + " }\n", + " )\n", + "\n", + " result_j = f2.create_dataset(\"images_j\", shape=new_result.shape, dtype=numpy.uint16, chunks=True)\n", + " par_norm_layer(num_frames=norm_frames)(result, out=result_j)\n", + "\n", + "\n", + "zip_zarr(data_basename + postfix_dict + zarr_ext)\n", + "\n", + "with h5py.File(data_basename + postfix_dict + h5_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f1:\n", + " zarr_to_hdf5(f1, f2)\n", + "\n", + "\n", + "if __IPYTHON__:\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")\n", + "\n", + " mplsv = plt.figure(FigureClass=MPLViewer)\n", + " mplsv.set_images(\n", + " result_image_stack,\n", + " vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n", + " vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n", + " )\n", + " mplsv.time_nav.stime.label.set_text(\"Basis Image\")" + ] + }, { "cell_type": "markdown", "metadata": {}, From 5313bc3ccfd2a042b4b3ed05582a39beac1a6ac0 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 6 Sep 2017 16:55:45 -0400 Subject: [PATCH 7/8] WIP: Extract ROIs from dictionary --- nanshe_ipython.ipynb | 229 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 211 insertions(+), 18 deletions(-) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index c888f02..203e4b5 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -212,6 +212,18 @@ "logging.getLogger(\"nanshe\").setLevel(logging.INFO)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from builtins import (\n", + " map as imap,\n", + " range as irange\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1300,9 +1312,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### ROI Refinement\n", + "### Component Refinement\n", "\n", - "* `area_min_threshold` (`float`): minimum area required for all ROIs" + "* `area_min_threshold` (`float`): minimum area required for all connected components" ] }, { @@ -1456,6 +1468,80 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Normalize Data\n", + "\n", + "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n", + "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "\n", + "block_frames = 40\n", + "norm_frames = 100\n", + "\n", + "\n", + "# Somehow we can't overwrite the file in the container so this is needed.\n", + "io_remove(data_basename + postfix_norm + zarr_ext)\n", + "io_remove(data_basename + postfix_norm + h5_ext)\n", + "\n", + "\n", + "with open_zarr(data_basename + postfix_thrd + zarr_ext, \"r\") as f:\n", + " with get_executor(client) as executor:\n", + " # Load and prep data for computation.\n", + " imgs = f[\"images\"]\n", + " da_imgs = da.from_array(\n", + " imgs, chunks=(block_frames,) + (imgs.ndim - 1) * (block_space,)\n", + " )\n", + "\n", + " da_imgs_flt = da_imgs\n", + " if not (issubclass(da_imgs_flt.dtype.type, np.floating) and \n", + " da_imgs_flt.dtype.itemsize >= 4):\n", + " da_imgs_flt = da_imgs_flt.astype(np.float32)\n", + "\n", + " da_result = normalize_data(da_imgs)\n", + "\n", + " # Store denoised data\n", + " with open_zarr(data_basename + postfix_norm + zarr_ext, \"w\") as f2:\n", + " result = f2.create_dataset(\n", + " \"images\",\n", + " shape=da_result.shape,\n", + " dtype=da_result.dtype,\n", + " chunks=True\n", + " )\n", + " da_result = da_result.rechunk(result.chunks)\n", + " status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n", + " dask.distributed.progress(status, notebook=False)\n", + "\n", + "\n", + "zip_zarr(data_basename + postfix_norm + zarr_ext)\n", + "\n", + "with h5py.File(data_basename + postfix_norm + h5_ext, \"w\") as f2:\n", + " with open_zarr(data_basename + postfix_norm + zarr_ext, \"r\") as f1:\n", + " zarr_to_hdf5(f1, f2)\n", + "\n", + "\n", + "if __IPYTHON__:\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n", + "\n", + " mplsv = plt.figure(FigureClass=MPLViewer)\n", + " mplsv.set_images(\n", + " result_image_stack,\n", + " vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n", + " vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n", + " )" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1485,7 +1571,7 @@ "n_components = 50\n", "batchsize = 256\n", "iters = 100\n", - "lambda1 = 0.2\n", + "lambda1 = 0.5\n", "lambda2 = 0.0\n", "\n", "block_frames = 51\n", @@ -1496,7 +1582,7 @@ "io_remove(data_basename + postfix_dict + zarr_ext)\n", "io_remove(data_basename + postfix_dict + h5_ext)\n", "\n", - "result = LazyZarrDataset(data_basename + postfix_thrd + zarr_ext, \"images\")\n", + "result = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n", "block_shape = (block_frames,) + result.shape[1:]\n", "with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n", " new_result = f2.create_dataset(\"images\", shape=(n_components,) + result.shape[1:], dtype=result.dtype, chunks=True)\n", @@ -1541,9 +1627,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### ROI and trace extraction\n", + "### ROI Extraction\n", "\n", - "* `block_frames` (`int`): number of frames to work with in each block (run in parallel)." + "* `normed_max_threshold` (`float`): minimum required value of maximum value of the normalized basis images.\n", + "* `basis_threshold` (`float`): threshold used on each basis image.\n", + "\n", + "
\n", + "\n", + "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel)." ] }, { @@ -1552,28 +1643,97 @@ "metadata": {}, "outputs": [], "source": [ - "%%time\n", + "#%%time\n", "\n", "\n", - "block_frames = 100\n", + "normed_max_threshold = 0.1\n", + "basis_threshold = 0.01\n", + "\n", + "block_frames = 40\n", "\n", "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", "io_remove(data_basename + postfix_rois + zarr_ext)\n", "io_remove(data_basename + postfix_rois + h5_ext)\n", "\n", - "with open_zarr(data_basename + postfix_rois + zarr_ext, \"w\") as f2:\n", - " with open_zarr(data_basename + postfix_post + zarr_ext, \"r\") as f1:\n", - " f2[\"masks\"] = f1[\"rois/mask\"]\n", + "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n", + " with get_executor(client) as executor:\n", + " # Load and prep data for computation.\n", + " imgs = f[\"images\"]\n", + " da_imgs = da.from_array(\n", + " imgs, chunks=(block_frames,) + imgs.shape[1:]\n", + " )\n", + "\n", + " da_imgs_pos = da.where(da_imgs > 0, da_imgs, 0)\n", + " da_imgs_norm = normalize_data(da_imgs_pos)\n", "\n", - " mskimg = f2[\"masks\"]\n", - " mskimg_j = f2.create_dataset(\"masks_j\", shape=mskimg.shape, dtype=numpy.uint8, chunks=True)\n", - " par_norm_layer(num_frames=block_frames)(mskimg, out=mskimg_j)\n", + " da_imgs_thrd = (\n", + " da_imgs_norm *\n", + " (\n", + " da_imgs_norm.max(axis=tuple(irange(1, da_imgs_norm.ndim)), keepdims=True) > normed_max_threshold\n", + " ).astype(da_imgs.dtype)\n", + " )\n", + " da_imgs_thrd = (da_imgs_thrd > basis_threshold)\n", + "\n", + " da_lbl_imgs = []\n", + " da_num_lbls = []\n", + " for i in irange(len(da_imgs_thrd)):\n", + " da_lbl_imgs_i, da_num_lbls_i = dask_ndmeasure.label(da_imgs_thrd[i])\n", + "\n", + " da_lbl_imgs.append(da_lbl_imgs_i)\n", + " da_num_lbls.append(da.asarray([da_num_lbls_i]))\n", + "\n", + " da_lbl_imgs = da.stack(da_lbl_imgs)\n", + " da_num_lbls = da.concatenate(da_num_lbls)\n", + "\n", + " da_adj_lbls = da.concatenate([\n", + " da.asarray(da_num_lbls.dtype.type(0)[None]),\n", + " da.cumsum(da_num_lbls, axis=0)[:-1]\n", + " ])\n", + "\n", + " da_lbl_imgs = da_lbl_imgs + da.where(\n", + " da_lbl_imgs != 0,\n", + " da_adj_lbls[(slice(None),) + (da_lbl_imgs.ndim - da_adj_lbls.ndim) * (None,)],\n", + " da_lbl_imgs.dtype.type(0)\n", + " )\n", + "\n", + " da_lbl_imgs, da_num_lbls, da_adj_lbls = executor.persist([da_lbl_imgs, da_num_lbls, da_adj_lbls])\n", + "\n", + " da_msk_imgs = []\n", + " for i in irange(len(da_lbl_imgs)):\n", + " for l in irange(1 + da_adj_lbls[i].compute(),\n", + " (1 + da_num_lbls + da_adj_lbls)[i].compute()):\n", + " da_msk_imgs.append(\n", + " (da_lbl_imgs[i] == l)[None]\n", + " )\n", + " da_msk_imgs = da.concatenate(da_msk_imgs)\n", + "\n", + " del da_num_lbls, da_adj_lbls\n", + "\n", + " da_lbl_img = (\n", + " da.arange(\n", + " 1, 1 + len(da_msk_imgs), chunks=da_msk_imgs.chunks[0]\n", + " )[(slice(None),) + (da_msk_imgs.ndim - 1) * (None,)] *\n", + " da_msk_imgs.astype(int)\n", + " ).sum(axis=0)\n", + " da_msk_imgs = da_msk_imgs.astype(np.uint8)\n", + "\n", + " # Store data\n", + " with open_zarr(data_basename + postfix_rois + zarr_ext, \"w\") as f2:\n", + " status = []\n", + " for name, da_result in {\"masks\" : da_msk_imgs, \"labels\" : da_lbl_img}.items():\n", + " result = f2.create_dataset(\n", + " name,\n", + " shape=da_result.shape,\n", + " dtype=da_result.dtype,\n", + " chunks=True\n", + " )\n", + " da_result = da_result.rechunk(result.chunks)\n", + " status.append(executor.compute(da.store(\n", + " da_result, result, lock=False, compute=False\n", + " )))\n", + " dask.distributed.progress(status, notebook=False)\n", "\n", - " lblimg = label_mask_stack(mskimg, np.uint64)\n", - " f2[\"labels\"] = lblimg\n", - " f2[\"labels_j\"] = lblimg.astype(np.uint16)\n", - " lblimg = f2[\"labels\"]\n", "\n", "zip_zarr(data_basename + postfix_rois + zarr_ext)\n", "\n", @@ -1581,6 +1741,39 @@ " with open_zarr(data_basename + postfix_rois + zarr_ext, \"r\") as f1:\n", " zarr_to_hdf5(f1, f2)\n", "\n", + "\n", + "if __IPYTHON__:\n", + " result_image_stack = LazyZarrDataset(data_basename + postfix_rois + zarr_ext, \"masks\")[...][...]\n", + "\n", + " mplsv = plt.figure(FigureClass=MPLViewer)\n", + " mplsv.set_images(\n", + " result_image_stack,\n", + " vmin=result_image_stack.min(),\n", + " vmax=result_image_stack.max()\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Trace extraction\n", + "\n", + "* `block_frames` (`int`): number of frames to work with in each block (run in parallel)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "\n", + "block_frames = 100\n", + "\n", + "\n", "# Somehow we can't overwrite the file in the container so this is needed.\n", "io_remove(data_basename + postfix_traces + zarr_ext)\n", "io_remove(data_basename + postfix_traces + h5_ext)\n", From 4e0288a723a1eef6fff304895312a0eab69f6eb0 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 13 Sep 2017 11:04:07 -0400 Subject: [PATCH 8/8] Add comments --- nanshe_ipython.ipynb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index 203e4b5..61625af 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -1664,6 +1664,7 @@ " imgs, chunks=(block_frames,) + imgs.shape[1:]\n", " )\n", "\n", + " # Drop empty basis images and threshold the rest\n", " da_imgs_pos = da.where(da_imgs > 0, da_imgs, 0)\n", " da_imgs_norm = normalize_data(da_imgs_pos)\n", "\n", @@ -1675,6 +1676,7 @@ " )\n", " da_imgs_thrd = (da_imgs_thrd > basis_threshold)\n", "\n", + " # Find the label images for each basis image\n", " da_lbl_imgs = []\n", " da_num_lbls = []\n", " for i in irange(len(da_imgs_thrd)):\n", @@ -1699,6 +1701,7 @@ "\n", " da_lbl_imgs, da_num_lbls, da_adj_lbls = executor.persist([da_lbl_imgs, da_num_lbls, da_adj_lbls])\n", "\n", + " # Extract ROIs from basis images\n", " da_msk_imgs = []\n", " for i in irange(len(da_lbl_imgs)):\n", " for l in irange(1 + da_adj_lbls[i].compute(),\n", @@ -1710,6 +1713,7 @@ "\n", " del da_num_lbls, da_adj_lbls\n", "\n", + " # Generate label image and convert mask type\n", " da_lbl_img = (\n", " da.arange(\n", " 1, 1 + len(da_msk_imgs), chunks=da_msk_imgs.chunks[0]\n",