From f123096f69330d4d8cd507e4c17bfd393aa29104 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Wed, 30 Aug 2017 15:28:48 -0400
Subject: [PATCH 1/8] Compute projection instead of dictionary

---
 nanshe_ipython.ipynb | 107 ++++++-------------------------------------
 1 file changed, 15 insertions(+), 92 deletions(-)
diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index cfc79e5..fe63083 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -1136,10 +1136,10 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### Normalize Data\n",
+        "### Project\n",
         "\n",
         "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n",
-        "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)."
+        "* `proj_type` (`str`): type of projection to take."
       ]
     },
     {
@@ -1152,12 +1152,12 @@
         "\n",
         "\n",
         "block_frames = 40\n",
-        "norm_frames = 100\n",
+        "proj_type = \"max\"\n",
         "\n",
         "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
-        "io_remove(data_basename + postfix_norm + zarr_ext)\n",
-        "io_remove(data_basename + postfix_norm + h5_ext)\n",
+        "io_remove(data_basename + postfix_dict + zarr_ext)\n",
+        "io_remove(data_basename + postfix_dict + h5_ext)\n",
         "\n",
         "\n",
         "with open_zarr(data_basename + postfix_wt + zarr_ext, \"r\") as f:\n",
@@ -1173,10 +1173,14 @@
         "                da_imgs_flt.dtype.itemsize >= 4):\n",
         "            da_imgs_flt = da_imgs_flt.astype(np.float32)\n",
         "\n",
-        "        da_result = normalize_data(da_imgs)\n",
+        "        da_result = da_imgs\n",
+        "        if proj_type == \"max\":\n",
+        "            da_result = da_result.max(axis=0, keepdims=True)\n",
+        "        elif proj_type == \"std\":\n",
+        "            da_result = da_result.std(axis=0, keepdims=True)\n",
         "\n",
         "        # Store denoised data\n",
-        "        with open_zarr(data_basename + postfix_norm + zarr_ext, \"w\") as f2:\n",
+        "        with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n",
         "            result = f2.create_dataset(\n",
         "                \"images\",\n",
         "                shape=da_result.shape,\n",
@@ -1188,86 +1192,6 @@
         "            dask.distributed.progress(status, notebook=False)\n",
         "\n",
         "\n",
-        "zip_zarr(data_basename + postfix_norm + zarr_ext)\n",
-        "\n",
-        "with h5py.File(data_basename + postfix_norm + h5_ext, \"w\") as f2:\n",
-        "    with open_zarr(data_basename + postfix_norm + zarr_ext, \"r\") as f1:\n",
-        "        zarr_to_hdf5(f1, f2)\n",
-        "\n",
-        "\n",
-        "if __IPYTHON__:\n",
-        "    result_image_stack = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n",
-        "\n",
-        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
-        "    mplsv.set_images(\n",
-        "        result_image_stack,\n",
-        "        vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n",
-        "        vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n",
-        "    )"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Dictionary Learning\n",
-        "\n",
-        "* `n_components` (`int`): number of basis images in the dictionary.\n",
-        "* `batchsize` (`int`): minibatch size to use.\n",
-        "* `iters` (`int`): number of iterations to run before getting dictionary.\n",
-        "* `lambda1` (`float`): weight for L<sup>1</sup> sparisty enforcement on sparse code.\n",
-        "* `lambda2` (`float`): weight for L<sup>2</sup> sparisty enforcement on sparse code.\n",
-        "\n",
-        "<br>\n",
-        "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n",
-        "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "\n",
-        "\n",
-        "n_components = 50\n",
-        "batchsize = 256\n",
-        "iters = 100\n",
-        "lambda1 = 0.2\n",
-        "lambda2 = 0.0\n",
-        "\n",
-        "block_frames = 51\n",
-        "norm_frames = 100\n",
-        "\n",
-        "\n",
-        "# Somehow we can't overwrite the file in the container so this is needed.\n",
-        "io_remove(data_basename + postfix_dict + zarr_ext)\n",
-        "io_remove(data_basename + postfix_dict + h5_ext)\n",
-        "\n",
-        "result = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n",
-        "block_shape = (block_frames,) + result.shape[1:]\n",
-        "with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n",
-        "    new_result = f2.create_dataset(\"images\", shape=(n_components,) + result.shape[1:], dtype=result.dtype, chunks=True)\n",
-        "\n",
-        "    result = par_generate_dictionary(block_shape)(\n",
-        "        result,\n",
-        "        n_components=n_components,\n",
-        "        out=new_result,\n",
-        "        **{\"sklearn.decomposition.dict_learning_online\" : {\n",
-        "                \"n_jobs\" : 1,\n",
-        "                \"n_iter\" : iters,\n",
-        "                \"batch_size\" : batchsize,\n",
-        "                \"alpha\" : lambda1\n",
-        "            }\n",
-        "        }\n",
-        "    )\n",
-        "\n",
-        "    result_j = f2.create_dataset(\"images_j\", shape=new_result.shape, dtype=numpy.uint16, chunks=True)\n",
-        "    par_norm_layer(num_frames=norm_frames)(result, out=result_j)\n",
-        "\n",
-        "\n",
         "zip_zarr(data_basename + postfix_dict + zarr_ext)\n",
         "\n",
         "with h5py.File(data_basename + postfix_dict + h5_ext, \"w\") as f2:\n",
@@ -1276,15 +1200,14 @@
         "\n",
         "\n",
         "if __IPYTHON__:\n",
-        "    result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")[...][...]\n",
         "\n",
         "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
         "    mplsv.set_images(\n",
         "        result_image_stack,\n",
-        "        vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n",
-        "        vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n",
-        "    )\n",
-        "    mplsv.time_nav.stime.label.set_text(\"Basis Image\")"
+        "        vmin=result_image_stack.min(),\n",
+        "        vmax=result_image_stack.max()\n",
+        "    )"
       ]
     },
     {

From 94e036b6a77c30a13cb0778b03bc8c10f9ea88db Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 25 Aug 2017 10:35:34 -0400
Subject: [PATCH 2/8] Require dask-ndmeasure

Needed to perform connected components and to constrain the resulting
label image.
---
 nanshe_workflow.recipe/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nanshe_workflow.recipe/meta.yaml b/nanshe_workflow.recipe/meta.yaml
index d414e37..45aa83f 100644
--- a/nanshe_workflow.recipe/meta.yaml
+++ b/nanshe_workflow.recipe/meta.yaml
@@ -31,6 +31,7 @@ requirements:
     - dask-imread
     - dask-ndfilters
     - dask-ndfourier
+    - dask-ndmeasure
     - runipy
     - cloudpickle
     - tifffile

From 71bba632f5a98f6eaafd8ca9a4487fb0f25e67c5 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 25 Aug 2017 15:32:25 -0400
Subject: [PATCH 3/8] Rewrite post-processing

Simply use a threshold with no other constraints.
---
 nanshe_ipython.ipynb | 121 ++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 71 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index fe63083..9a416cb 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -184,6 +184,7 @@
         "    import dask_imread\n",
         "    import dask_ndfilters\n",
         "    import dask_ndfourier\n",
+        "    import dask_ndmeasure\n",
         "\n",
         "    from toolz import sliding_window\n",
         "\n",
@@ -1138,8 +1139,12 @@
       "source": [
         "### Project\n",
         "\n",
+        "* `proj_type` (`str`): type of projection to take.\n",
+        "\n",
+        "<br>\n",
+        "\n",
         "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n",
-        "* `proj_type` (`str`): type of projection to take."
+        "* `block_space` (`int`): extent of each spatial dimension for each block (run in parallel)."
       ]
     },
     {
@@ -1151,9 +1156,11 @@
         "%%time\n",
         "\n",
         "\n",
-        "block_frames = 40\n",
         "proj_type = \"max\"\n",
         "\n",
+        "block_frames = 40\n",
+        "block_space = 300\n",
+        "\n",
         "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
         "io_remove(data_basename + postfix_dict + zarr_ext)\n",
@@ -1217,15 +1224,7 @@
         "### Postprocessing\n",
         "\n",
         "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n",
-        "* `wavelet_scale` (`int`): scale of wavelet transform to apply (should be the same as the one used above)\n",
-        "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant\n",
-        "* `accepted_region_shape_constraints` (`dict`): if ROIs don't match this, reduce the `wavelet_scale` once.\n",
-        "* `percentage_pixels_below_max` (`float`): upper bound on ratio of ROI pixels not at max intensity vs. all ROI pixels\n",
-        "* `min_local_max_distance` (`float`): minimum allowable euclidean distance between two ROIs maximum intensities\n",
-        "* `accepted_neuron_shape_constraints` (`dict`): shape constraints for ROI to be kept.\n",
-        "\n",
-        "* `alignment_min_threshold` (`float`): similarity measure of the intensity of two ROIs images used for merging.\n",
-        "* `overlap_min_threshold` (`float`): similarity measure of the masks of two ROIs used for merging."
+        "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant"
       ]
     },
     {
@@ -1238,80 +1237,60 @@
         "\n",
         "\n",
         "significance_threshold = 3.0\n",
-        "wavelet_scale = 3\n",
-        "noise_threshold = 3.0\n",
-        "percentage_pixels_below_max = 0.8\n",
-        "min_local_max_distance = 16.0\n",
-        "\n",
-        "alignment_min_threshold = 0.6\n",
-        "overlap_min_threshold = 0.6\n",
+        "noise_threshold = 1.0\n",
         "\n",
         "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
         "io_remove(data_basename + postfix_post + zarr_ext)\n",
         "io_remove(data_basename + postfix_post + h5_ext)\n",
         "\n",
-        "result = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")\n",
-        "with open_zarr(data_basename + postfix_post + zarr_ext, \"w\") as f2:\n",
-        "    result = par_postprocess_data(result,\n",
-        "                                  **{\n",
-        "                                        \"wavelet_denoising\" : {\n",
-        "                                            \"estimate_noise\" : {\n",
-        "                                                \"significance_threshold\" : significance_threshold\n",
-        "                                            },\n",
-        "                                            \"wavelet.transform\" : {\n",
-        "                                                \"scale\" : wavelet_scale\n",
-        "                                            },\n",
-        "                                            \"significant_mask\" : {\n",
-        "                                                \"noise_threshold\" : noise_threshold\n",
-        "                                            },\n",
-        "                                            \"accepted_region_shape_constraints\" : {\n",
-        "                                                \"major_axis_length\" : {\n",
-        "                                                    \"min\" : 0.0,\n",
-        "                                                    \"max\" : 25.0\n",
-        "                                                }\n",
-        "                                            },\n",
-        "                                            \"remove_low_intensity_local_maxima\" : {\n",
-        "                                                \"percentage_pixels_below_max\" : percentage_pixels_below_max\n",
-        "                                            },\n",
-        "                                            \"remove_too_close_local_maxima\" : {\n",
-        "                                                \"min_local_max_distance\" : min_local_max_distance\n",
-        "                                            },\n",
-        "                                            \"accepted_neuron_shape_constraints\" : {\n",
-        "                                                \"area\" : {\n",
-        "                                                    \"min\" : 25,\n",
-        "                                                    \"max\" : 600\n",
-        "                                                },\n",
-        "                                                \"eccentricity\" : {\n",
-        "                                                    \"min\" : 0.0,\n",
-        "                                                    \"max\" : 0.9\n",
-        "                                                }\n",
-        "                                            }\n",
-        "                                        },\n",
-        "                                        \"merge_neuron_sets\" : {\n",
-        "                                            \"alignment_min_threshold\" : alignment_min_threshold,\n",
-        "                                            \"overlap_min_threshold\" : overlap_min_threshold,\n",
-        "                                            \"fuse_neurons\" : {\n",
-        "                                                \"fraction_mean_neuron_max_threshold\" : 0.01\n",
-        "                                            }\n",
-        "                                        }\n",
-        "                                  }\n",
-        "    )\n",
         "\n",
-        "    f2.create_group(\"rois\")\n",
-        "    for each_name in result.dtype.names:\n",
-        "        f2.require_group(\"rois\").create_dataset(\n",
-        "            each_name,\n",
-        "            data=result[each_name],\n",
-        "            chunks=True\n",
+        "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n",
+        "    with get_executor(client) as executor:\n",
+        "        imgs = f[\"images\"]\n",
+        "        da_imgs = da.from_array(\n",
+        "            imgs, chunks=imgs.shape\n",
         "        )\n",
+        "        da_imgs = da_imgs[0]\n",
+        "\n",
+        "        da_imgs_thrd = (da_imgs - noise_threshold * (da_imgs - significance_threshold * da_imgs.std()).std()) > 0\n",
+        "\n",
+        "        da_lbl_img, da_num_lbls = dask_ndmeasure.label(da_imgs_thrd)\n",
+        "        da_lbl_img, da_num_lbls = executor.persist([da_lbl_img, da_num_lbls])\n",
+        "\n",
+        "        da_result = []\n",
+        "        for i in irange(1, 1 + int(da_num_lbls)):\n",
+        "            da_result.append(da_lbl_img == i)\n",
+        "        da_result = da.stack(da_result)\n",
+        "\n",
+        "        with open_zarr(data_basename + postfix_post + zarr_ext, \"w\") as f2:\n",
+        "            result = f2.create_group(\"rois\").create_dataset(\n",
+        "                \"mask\",\n",
+        "                shape=da_result.shape,\n",
+        "                dtype=da_result.dtype,\n",
+        "                chunks=True\n",
+        "            )\n",
+        "            da_result = da_result.rechunk(result.chunks)\n",
+        "            status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n",
+        "            dask.distributed.progress(status, notebook=False)\n",
         "\n",
         "\n",
         "zip_zarr(data_basename + postfix_post + zarr_ext)\n",
         "\n",
         "with h5py.File(data_basename + postfix_post + h5_ext, \"w\") as f2:\n",
         "    with open_zarr(data_basename + postfix_post + zarr_ext, \"r\") as f1:\n",
-        "        zarr_to_hdf5(f1, f2)"
+        "        zarr_to_hdf5(f1, f2)\n",
+        "\n",
+        "\n",
+        "if __IPYTHON__:\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_post + zarr_ext, \"rois/mask\")[...][...].astype(np.uint8)\n",
+        "\n",
+        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "    mplsv.set_images(\n",
+        "        result_image_stack,\n",
+        "        vmin=result_image_stack.min(),\n",
+        "        vmax=result_image_stack.max()\n",
+        "    )"
       ]
     },
     {

From 3c2a7add1f85e54793d53c0e49c0ba98f71c02c5 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Wed, 30 Aug 2017 13:03:08 -0400
Subject: [PATCH 4/8] Constrain area

---
 nanshe_ipython.ipynb | 100 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 94 insertions(+), 6 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index 9a416cb..722503d 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -53,6 +53,7 @@
         "postfix_wt = \"_wt\"\n",
         "postfix_norm = \"_norm\"\n",
         "postfix_dict = \"_dict\"\n",
+        "postfix_cc = \"_cc\"\n",
         "postfix_post = \"_post\"\n",
         "postfix_rois = \"_rois\"\n",
         "postfix_traces = \"_traces\"\n",
@@ -560,7 +561,7 @@
         "%%time\n",
         "\n",
         "\n",
-        "num_reps = 5\n",
+        "num_reps = 0\n",
         "tmpl_hist_wght = 0.25\n",
         "thld_rel_dist = 0.0\n",
         "\n",
@@ -1221,7 +1222,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### Postprocessing\n",
+        "### Connected components\n",
         "\n",
         "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n",
         "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant"
@@ -1241,8 +1242,8 @@
         "\n",
         "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
-        "io_remove(data_basename + postfix_post + zarr_ext)\n",
-        "io_remove(data_basename + postfix_post + h5_ext)\n",
+        "io_remove(data_basename + postfix_cc + zarr_ext)\n",
+        "io_remove(data_basename + postfix_cc + h5_ext)\n",
         "\n",
         "\n",
         "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n",
@@ -1260,8 +1261,95 @@
         "\n",
         "        da_result = []\n",
         "        for i in irange(1, 1 + int(da_num_lbls)):\n",
-        "            da_result.append(da_lbl_img == i)\n",
-        "        da_result = da.stack(da_result)\n",
+        "            da_result.append((da_lbl_img == i)[None])\n",
+        "        da_result = da.concatenate(da_result)\n",
+        "\n",
+        "        with open_zarr(data_basename + postfix_cc + zarr_ext, \"w\") as f2:\n",
+        "            result = f2.create_group(\"rois\").create_dataset(\n",
+        "                \"mask\",\n",
+        "                shape=da_result.shape,\n",
+        "                dtype=da_result.dtype,\n",
+        "                chunks=True\n",
+        "            )\n",
+        "            da_result = da_result.rechunk(result.chunks)\n",
+        "            status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n",
+        "            dask.distributed.progress(status, notebook=False)\n",
+        "\n",
+        "\n",
+        "zip_zarr(data_basename + postfix_cc + zarr_ext)\n",
+        "\n",
+        "with h5py.File(data_basename + postfix_cc + h5_ext, \"w\") as f2:\n",
+        "    with open_zarr(data_basename + postfix_cc + zarr_ext, \"r\") as f1:\n",
+        "        zarr_to_hdf5(f1, f2)\n",
+        "\n",
+        "\n",
+        "if __IPYTHON__:\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_cc + zarr_ext, \"rois/mask\")[...][...].astype(np.uint8)\n",
+        "\n",
+        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "    mplsv.set_images(\n",
+        "        result_image_stack,\n",
+        "        vmin=result_image_stack.min(),\n",
+        "        vmax=result_image_stack.max()\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### ROI Refinement\n",
+        "\n",
+        "* `area_min_threshold` (`float`): minimum area required for all ROIs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "\n",
+        "\n",
+        "area_min_threshold = 20.0\n",
+        "\n",
+        "\n",
+        "# Somehow we can't overwrite the file in the container so this is needed.\n",
+        "io_remove(data_basename + postfix_post + zarr_ext)\n",
+        "io_remove(data_basename + postfix_post + h5_ext)\n",
+        "\n",
+        "\n",
+        "with open_zarr(data_basename + postfix_cc + zarr_ext, \"r\") as f:\n",
+        "    with get_executor(client) as executor:\n",
+        "        imgs = f[\"rois/mask\"]\n",
+        "        da_imgs = da.from_array(\n",
+        "            imgs, chunks=imgs.shape\n",
+        "        )\n",
+        "\n",
+        "        da_num_lbls = len(da_imgs)\n",
+        "        da_lbl_img = (\n",
+        "            np.arange(1, 1 + da_num_lbls)[(slice(None),) + da_lbl_img.ndim * (None,)] * da_imgs\n",
+        "        ).sum(axis=0)\n",
+        "\n",
+        "        da_area_lbls = dask_ndmeasure.sum(\n",
+        "            da.ones(da_lbl_img.shape, dtype=int, chunks=da_lbl_img.chunks),\n",
+        "            da_lbl_img,\n",
+        "            list(irange(1, 1 + int(da_num_lbls)))\n",
+        "        )\n",
+        "\n",
+        "        da_lbl_img, da_num_lbls = dask_ndmeasure.label(\n",
+        "            (\n",
+        "                ((da_area_lbls >= area_min_threshold)[(slice(None),) + da_lbl_img.ndim * (None,)] * da_lbl_img) > 0\n",
+        "            ).sum(axis=0)\n",
+        "        )\n",
+        "\n",
+        "        da_lbl_img, da_num_lbls = executor.persist([da_lbl_img, da_num_lbls])\n",
+        "\n",
+        "        da_result = []\n",
+        "        for i in irange(1, 1 + int(da_num_lbls)):\n",
+        "            da_result.append((da_lbl_img == i)[None])\n",
+        "        da_result = da.concatenate(da_result)\n",
         "\n",
         "        with open_zarr(data_basename + postfix_post + zarr_ext, \"w\") as f2:\n",
         "            result = f2.create_group(\"rois\").create_dataset(\n",

From fe3381d4f013645ee0e3daa12dd42ce2ca4a1ed5 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Wed, 6 Sep 2017 10:43:18 -0400
Subject: [PATCH 5/8] Threshold processed data with mask

---
 nanshe_ipython.ipynb | 74 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index 722503d..6151cd0 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -55,6 +55,7 @@
         "postfix_dict = \"_dict\"\n",
         "postfix_cc = \"_cc\"\n",
         "postfix_post = \"_post\"\n",
+        "postfix_thrd = \"_thrd\"\n",
         "postfix_rois = \"_rois\"\n",
         "postfix_traces = \"_traces\"\n",
         "postfix_proj = \"_proj\"\n",
@@ -1381,6 +1382,79 @@
         "    )"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Threshold data\n",
+        "\n",
+        "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n",
+        "* `block_space` (`int`): extent of each spatial dimension for each block (run in parallel)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "\n",
+        "\n",
+        "block_frames = 40\n",
+        "block_space = 300\n",
+        "\n",
+        "\n",
+        "# Somehow we can't overwrite the file in the container so this is needed.\n",
+        "io_remove(data_basename + postfix_thrd + zarr_ext)\n",
+        "io_remove(data_basename + postfix_thrd + h5_ext)\n",
+        "\n",
+        "with open_zarr(data_basename + postfix_wt + zarr_ext, \"r\") as f:\n",
+        "    with open_zarr(data_basename + postfix_post + zarr_ext, \"r\") as f2:\n",
+        "        with get_executor(client) as executor:\n",
+        "            # Load and prep data for computation.\n",
+        "            imgs = f[\"images\"]\n",
+        "            da_imgs = da.from_array(\n",
+        "                imgs, chunks=(block_frames,) + (imgs.ndim - 1) * (block_space,)\n",
+        "            )\n",
+        "            msks = f2[\"rois/mask\"]\n",
+        "            da_msks = da.from_array(\n",
+        "                msks, chunks=(block_frames,) + (imgs.ndim - 1) * (block_space,)\n",
+        "            )\n",
+        "\n",
+        "            da_result = da_imgs * da_msks.max(axis=0, keepdims=True).astype(da_imgs.dtype)\n",
+        "\n",
+        "            # Store data\n",
+        "            with open_zarr(data_basename + postfix_thrd + zarr_ext, \"w\") as f2:\n",
+        "                result = f2.create_dataset(\n",
+        "                    \"images\",\n",
+        "                    shape=da_result.shape,\n",
+        "                    dtype=da_result.dtype,\n",
+        "                    chunks=True\n",
+        "                )\n",
+        "                da_result = da_result.rechunk(result.chunks)\n",
+        "                status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n",
+        "                dask.distributed.progress(status, notebook=False)\n",
+        "\n",
+        "\n",
+        "zip_zarr(data_basename + postfix_thrd + zarr_ext)\n",
+        "\n",
+        "with h5py.File(data_basename + postfix_thrd + h5_ext, \"w\") as f2:\n",
+        "    with open_zarr(data_basename + postfix_thrd + zarr_ext, \"r\") as f1:\n",
+        "        zarr_to_hdf5(f1, f2)\n",
+        "\n",
+        "\n",
+        "if __IPYTHON__:\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_thrd + zarr_ext, \"images\")[...][...]\n",
+        "\n",
+        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "    mplsv.set_images(\n",
+        "        result_image_stack,\n",
+        "        vmin=result_image_stack.min(),\n",
+        "        vmax=result_image_stack.max()\n",
+        "    )"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {},

From 6cdc32abf17c39c33469f4fee363f2743047abc5 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Wed, 6 Sep 2017 11:15:14 -0400
Subject: [PATCH 6/8] Find dictionary on thresholded data

---
 nanshe_ipython.ipynb | 100 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 91 insertions(+), 9 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index 6151cd0..c888f02 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -52,10 +52,11 @@
         "postfix_f_f0 = \"_f_f0\"\n",
         "postfix_wt = \"_wt\"\n",
         "postfix_norm = \"_norm\"\n",
-        "postfix_dict = \"_dict\"\n",
+        "postfix_flat = \"_flat\"\n",
         "postfix_cc = \"_cc\"\n",
         "postfix_post = \"_post\"\n",
         "postfix_thrd = \"_thrd\"\n",
+        "postfix_dict = \"_dict\"\n",
         "postfix_rois = \"_rois\"\n",
         "postfix_traces = \"_traces\"\n",
         "postfix_proj = \"_proj\"\n",
@@ -1165,8 +1166,8 @@
         "\n",
         "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
-        "io_remove(data_basename + postfix_dict + zarr_ext)\n",
-        "io_remove(data_basename + postfix_dict + h5_ext)\n",
+        "io_remove(data_basename + postfix_flat + zarr_ext)\n",
+        "io_remove(data_basename + postfix_flat + h5_ext)\n",
         "\n",
         "\n",
         "with open_zarr(data_basename + postfix_wt + zarr_ext, \"r\") as f:\n",
@@ -1189,7 +1190,7 @@
         "            da_result = da_result.std(axis=0, keepdims=True)\n",
         "\n",
         "        # Store denoised data\n",
-        "        with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n",
+        "        with open_zarr(data_basename + postfix_flat + zarr_ext, \"w\") as f2:\n",
         "            result = f2.create_dataset(\n",
         "                \"images\",\n",
         "                shape=da_result.shape,\n",
@@ -1201,15 +1202,15 @@
         "            dask.distributed.progress(status, notebook=False)\n",
         "\n",
         "\n",
-        "zip_zarr(data_basename + postfix_dict + zarr_ext)\n",
+        "zip_zarr(data_basename + postfix_flat + zarr_ext)\n",
         "\n",
-        "with h5py.File(data_basename + postfix_dict + h5_ext, \"w\") as f2:\n",
-        "    with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f1:\n",
+        "with h5py.File(data_basename + postfix_flat + h5_ext, \"w\") as f2:\n",
+        "    with open_zarr(data_basename + postfix_flat + zarr_ext, \"r\") as f1:\n",
         "        zarr_to_hdf5(f1, f2)\n",
         "\n",
         "\n",
         "if __IPYTHON__:\n",
-        "    result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")[...][...]\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_flat + zarr_ext, \"images\")[...][...]\n",
         "\n",
         "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
         "    mplsv.set_images(\n",
@@ -1247,7 +1248,7 @@
         "io_remove(data_basename + postfix_cc + h5_ext)\n",
         "\n",
         "\n",
-        "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n",
+        "with open_zarr(data_basename + postfix_flat + zarr_ext, \"r\") as f:\n",
         "    with get_executor(client) as executor:\n",
         "        imgs = f[\"images\"]\n",
         "        da_imgs = da.from_array(\n",
@@ -1455,6 +1456,87 @@
         "    )"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Dictionary Learning\n",
+        "\n",
+        "* `n_components` (`int`): number of basis images in the dictionary.\n",
+        "* `batchsize` (`int`): minibatch size to use.\n",
+        "* `iters` (`int`): number of iterations to run before getting dictionary.\n",
+        "* `lambda1` (`float`): weight for L<sup>1</sup> sparisty enforcement on sparse code.\n",
+        "* `lambda2` (`float`): weight for L<sup>2</sup> sparisty enforcement on sparse code.\n",
+        "\n",
+        "<br>\n",
+        "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n",
+        "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "\n",
+        "\n",
+        "n_components = 50\n",
+        "batchsize = 256\n",
+        "iters = 100\n",
+        "lambda1 = 0.2\n",
+        "lambda2 = 0.0\n",
+        "\n",
+        "block_frames = 51\n",
+        "norm_frames = 100\n",
+        "\n",
+        "\n",
+        "# Somehow we can't overwrite the file in the container so this is needed.\n",
+        "io_remove(data_basename + postfix_dict + zarr_ext)\n",
+        "io_remove(data_basename + postfix_dict + h5_ext)\n",
+        "\n",
+        "result = LazyZarrDataset(data_basename + postfix_thrd + zarr_ext, \"images\")\n",
+        "block_shape = (block_frames,) + result.shape[1:]\n",
+        "with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n",
+        "    new_result = f2.create_dataset(\"images\", shape=(n_components,) + result.shape[1:], dtype=result.dtype, chunks=True)\n",
+        "\n",
+        "    result = par_generate_dictionary(block_shape)(\n",
+        "        result,\n",
+        "        n_components=n_components,\n",
+        "        out=new_result,\n",
+        "        **{\"sklearn.decomposition.dict_learning_online\" : {\n",
+        "                \"n_jobs\" : 1,\n",
+        "                \"n_iter\" : iters,\n",
+        "                \"batch_size\" : batchsize,\n",
+        "                \"alpha\" : lambda1\n",
+        "            }\n",
+        "        }\n",
+        "    )\n",
+        "\n",
+        "    result_j = f2.create_dataset(\"images_j\", shape=new_result.shape, dtype=numpy.uint16, chunks=True)\n",
+        "    par_norm_layer(num_frames=norm_frames)(result, out=result_j)\n",
+        "\n",
+        "\n",
+        "zip_zarr(data_basename + postfix_dict + zarr_ext)\n",
+        "\n",
+        "with h5py.File(data_basename + postfix_dict + h5_ext, \"w\") as f2:\n",
+        "    with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f1:\n",
+        "        zarr_to_hdf5(f1, f2)\n",
+        "\n",
+        "\n",
+        "if __IPYTHON__:\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_dict + zarr_ext, \"images\")\n",
+        "\n",
+        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "    mplsv.set_images(\n",
+        "        result_image_stack,\n",
+        "        vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n",
+        "        vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n",
+        "    )\n",
+        "    mplsv.time_nav.stime.label.set_text(\"Basis Image\")"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {},

From 5313bc3ccfd2a042b4b3ed05582a39beac1a6ac0 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Wed, 6 Sep 2017 16:55:45 -0400
Subject: [PATCH 7/8] WIP: Extract ROIs from dictionary

---
 nanshe_ipython.ipynb | 229 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 211 insertions(+), 18 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index c888f02..203e4b5 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -212,6 +212,18 @@
         "logging.getLogger(\"nanshe\").setLevel(logging.INFO)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from builtins import (\n",
+        "    map as imap,\n",
+        "    range as irange\n",
+        ")"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -1300,9 +1312,9 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### ROI Refinement\n",
+        "### Component Refinement\n",
         "\n",
-        "* `area_min_threshold` (`float`): minimum area required for all ROIs"
+        "* `area_min_threshold` (`float`): minimum area required for all connected components"
       ]
     },
     {
@@ -1456,6 +1468,80 @@
         "    )"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Normalize Data\n",
+        "\n",
+        "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel).\n",
+        "* `norm_frames` (`int`): number of frames for use during normalization of each full frame block (run in parallel)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "\n",
+        "\n",
+        "block_frames = 40\n",
+        "norm_frames = 100\n",
+        "\n",
+        "\n",
+        "# Somehow we can't overwrite the file in the container so this is needed.\n",
+        "io_remove(data_basename + postfix_norm + zarr_ext)\n",
+        "io_remove(data_basename + postfix_norm + h5_ext)\n",
+        "\n",
+        "\n",
+        "with open_zarr(data_basename + postfix_thrd + zarr_ext, \"r\") as f:\n",
+        "    with get_executor(client) as executor:\n",
+        "        # Load and prep data for computation.\n",
+        "        imgs = f[\"images\"]\n",
+        "        da_imgs = da.from_array(\n",
+        "            imgs, chunks=(block_frames,) + (imgs.ndim - 1) * (block_space,)\n",
+        "        )\n",
+        "\n",
+        "        da_imgs_flt = da_imgs\n",
+        "        if not (issubclass(da_imgs_flt.dtype.type, np.floating) and \n",
+        "                da_imgs_flt.dtype.itemsize >= 4):\n",
+        "            da_imgs_flt = da_imgs_flt.astype(np.float32)\n",
+        "\n",
+        "        da_result = normalize_data(da_imgs)\n",
+        "\n",
+        "        # Store denoised data\n",
+        "        with open_zarr(data_basename + postfix_norm + zarr_ext, \"w\") as f2:\n",
+        "            result = f2.create_dataset(\n",
+        "                \"images\",\n",
+        "                shape=da_result.shape,\n",
+        "                dtype=da_result.dtype,\n",
+        "                chunks=True\n",
+        "            )\n",
+        "            da_result = da_result.rechunk(result.chunks)\n",
+        "            status = executor.compute(da.store(da_result, result, lock=False, compute=False))\n",
+        "            dask.distributed.progress(status, notebook=False)\n",
+        "\n",
+        "\n",
+        "zip_zarr(data_basename + postfix_norm + zarr_ext)\n",
+        "\n",
+        "with h5py.File(data_basename + postfix_norm + h5_ext, \"w\") as f2:\n",
+        "    with open_zarr(data_basename + postfix_norm + zarr_ext, \"r\") as f1:\n",
+        "        zarr_to_hdf5(f1, f2)\n",
+        "\n",
+        "\n",
+        "if __IPYTHON__:\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n",
+        "\n",
+        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "    mplsv.set_images(\n",
+        "        result_image_stack,\n",
+        "        vmin=par_compute_min_projection(num_frames=norm_frames)(result_image_stack).min(),\n",
+        "        vmax=par_compute_max_projection(num_frames=norm_frames)(result_image_stack).max()\n",
+        "    )"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {},
@@ -1485,7 +1571,7 @@
         "n_components = 50\n",
         "batchsize = 256\n",
         "iters = 100\n",
-        "lambda1 = 0.2\n",
+        "lambda1 = 0.5\n",
         "lambda2 = 0.0\n",
         "\n",
         "block_frames = 51\n",
@@ -1496,7 +1582,7 @@
         "io_remove(data_basename + postfix_dict + zarr_ext)\n",
         "io_remove(data_basename + postfix_dict + h5_ext)\n",
         "\n",
-        "result = LazyZarrDataset(data_basename + postfix_thrd + zarr_ext, \"images\")\n",
+        "result = LazyZarrDataset(data_basename + postfix_norm + zarr_ext, \"images\")\n",
         "block_shape = (block_frames,) + result.shape[1:]\n",
         "with open_zarr(data_basename + postfix_dict + zarr_ext, \"w\") as f2:\n",
         "    new_result = f2.create_dataset(\"images\", shape=(n_components,) + result.shape[1:], dtype=result.dtype, chunks=True)\n",
@@ -1541,9 +1627,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### ROI and trace extraction\n",
+        "### ROI Extraction\n",
         "\n",
-        "* `block_frames` (`int`): number of frames to work with in each block (run in parallel)."
+        "* `normed_max_threshold` (`float`): minimum required value of maximum value of the normalized basis images.\n",
+        "* `basis_threshold` (`float`): threshold used on each basis image.\n",
+        "\n",
+        "<br>\n",
+        "\n",
+        "* `block_frames` (`int`): number of frames to work with in each full frame block (run in parallel)."
       ]
     },
     {
@@ -1552,28 +1643,97 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "%%time\n",
+        "#%%time\n",
         "\n",
         "\n",
-        "block_frames = 100\n",
+        "normed_max_threshold = 0.1\n",
+        "basis_threshold = 0.01\n",
+        "\n",
+        "block_frames = 40\n",
         "\n",
         "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
         "io_remove(data_basename + postfix_rois + zarr_ext)\n",
         "io_remove(data_basename + postfix_rois + h5_ext)\n",
         "\n",
-        "with open_zarr(data_basename + postfix_rois + zarr_ext, \"w\") as f2:\n",
-        "    with open_zarr(data_basename + postfix_post + zarr_ext, \"r\") as f1:\n",
-        "        f2[\"masks\"] = f1[\"rois/mask\"]\n",
+        "with open_zarr(data_basename + postfix_dict + zarr_ext, \"r\") as f:\n",
+        "    with get_executor(client) as executor:\n",
+        "        # Load and prep data for computation.\n",
+        "        imgs = f[\"images\"]\n",
+        "        da_imgs = da.from_array(\n",
+        "            imgs, chunks=(block_frames,) + imgs.shape[1:]\n",
+        "        )\n",
+        "\n",
+        "        da_imgs_pos = da.where(da_imgs > 0, da_imgs, 0)\n",
+        "        da_imgs_norm = normalize_data(da_imgs_pos)\n",
         "\n",
-        "    mskimg = f2[\"masks\"]\n",
-        "    mskimg_j = f2.create_dataset(\"masks_j\", shape=mskimg.shape, dtype=numpy.uint8, chunks=True)\n",
-        "    par_norm_layer(num_frames=block_frames)(mskimg, out=mskimg_j)\n",
+        "        da_imgs_thrd = (\n",
+        "            da_imgs_norm *\n",
+        "            (\n",
+        "                da_imgs_norm.max(axis=tuple(irange(1, da_imgs_norm.ndim)), keepdims=True) > normed_max_threshold\n",
+        "            ).astype(da_imgs.dtype)\n",
+        "        )\n",
+        "        da_imgs_thrd = (da_imgs_thrd > basis_threshold)\n",
+        "\n",
+        "        da_lbl_imgs = []\n",
+        "        da_num_lbls = []\n",
+        "        for i in irange(len(da_imgs_thrd)):\n",
+        "            da_lbl_imgs_i, da_num_lbls_i = dask_ndmeasure.label(da_imgs_thrd[i])\n",
+        "\n",
+        "            da_lbl_imgs.append(da_lbl_imgs_i)\n",
+        "            da_num_lbls.append(da.asarray([da_num_lbls_i]))\n",
+        "\n",
+        "        da_lbl_imgs = da.stack(da_lbl_imgs)\n",
+        "        da_num_lbls = da.concatenate(da_num_lbls)\n",
+        "\n",
+        "        da_adj_lbls = da.concatenate([\n",
+        "            da.asarray(da_num_lbls.dtype.type(0)[None]),\n",
+        "            da.cumsum(da_num_lbls, axis=0)[:-1]\n",
+        "        ])\n",
+        "\n",
+        "        da_lbl_imgs = da_lbl_imgs + da.where(\n",
+        "            da_lbl_imgs != 0,\n",
+        "            da_adj_lbls[(slice(None),) + (da_lbl_imgs.ndim - da_adj_lbls.ndim) * (None,)],\n",
+        "            da_lbl_imgs.dtype.type(0)\n",
+        "        )\n",
+        "\n",
+        "        da_lbl_imgs, da_num_lbls, da_adj_lbls = executor.persist([da_lbl_imgs, da_num_lbls, da_adj_lbls])\n",
+        "\n",
+        "        da_msk_imgs = []\n",
+        "        for i in irange(len(da_lbl_imgs)):\n",
+        "            for l in irange(1 + da_adj_lbls[i].compute(),\n",
+        "                            (1 + da_num_lbls + da_adj_lbls)[i].compute()):\n",
+        "                da_msk_imgs.append(\n",
+        "                    (da_lbl_imgs[i] == l)[None]\n",
+        "                )\n",
+        "        da_msk_imgs = da.concatenate(da_msk_imgs)\n",
+        "\n",
+        "        del da_num_lbls, da_adj_lbls\n",
+        "\n",
+        "        da_lbl_img = (\n",
+        "            da.arange(\n",
+        "                1, 1 + len(da_msk_imgs), chunks=da_msk_imgs.chunks[0]\n",
+        "            )[(slice(None),) + (da_msk_imgs.ndim - 1) * (None,)] *\n",
+        "            da_msk_imgs.astype(int)\n",
+        "        ).sum(axis=0)\n",
+        "        da_msk_imgs = da_msk_imgs.astype(np.uint8)\n",
+        "\n",
+        "        # Store data\n",
+        "        with open_zarr(data_basename + postfix_rois + zarr_ext, \"w\") as f2:\n",
+        "            status = []\n",
+        "            for name, da_result in {\"masks\" : da_msk_imgs, \"labels\" : da_lbl_img}.items():\n",
+        "                result = f2.create_dataset(\n",
+        "                    name,\n",
+        "                    shape=da_result.shape,\n",
+        "                    dtype=da_result.dtype,\n",
+        "                    chunks=True\n",
+        "                )\n",
+        "                da_result = da_result.rechunk(result.chunks)\n",
+        "                status.append(executor.compute(da.store(\n",
+        "                    da_result, result, lock=False, compute=False\n",
+        "                )))\n",
+        "            dask.distributed.progress(status, notebook=False)\n",
         "\n",
-        "    lblimg = label_mask_stack(mskimg, np.uint64)\n",
-        "    f2[\"labels\"] = lblimg\n",
-        "    f2[\"labels_j\"] = lblimg.astype(np.uint16)\n",
-        "    lblimg = f2[\"labels\"]\n",
         "\n",
         "zip_zarr(data_basename + postfix_rois + zarr_ext)\n",
         "\n",
@@ -1581,6 +1741,39 @@
         "    with open_zarr(data_basename + postfix_rois + zarr_ext, \"r\") as f1:\n",
         "        zarr_to_hdf5(f1, f2)\n",
         "\n",
+        "\n",
+        "if __IPYTHON__:\n",
+        "    result_image_stack = LazyZarrDataset(data_basename + postfix_rois + zarr_ext, \"masks\")[...][...]\n",
+        "\n",
+        "    mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "    mplsv.set_images(\n",
+        "        result_image_stack,\n",
+        "        vmin=result_image_stack.min(),\n",
+        "        vmax=result_image_stack.max()\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Trace extraction\n",
+        "\n",
+        "* `block_frames` (`int`): number of frames to work with in each block (run in parallel)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "\n",
+        "\n",
+        "block_frames = 100\n",
+        "\n",
+        "\n",
         "# Somehow we can't overwrite the file in the container so this is needed.\n",
         "io_remove(data_basename + postfix_traces + zarr_ext)\n",
         "io_remove(data_basename + postfix_traces + h5_ext)\n",

From 4e0288a723a1eef6fff304895312a0eab69f6eb0 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Wed, 13 Sep 2017 11:04:07 -0400
Subject: [PATCH 8/8] Add comments

---
 nanshe_ipython.ipynb | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index 203e4b5..61625af 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -1664,6 +1664,7 @@
         "            imgs, chunks=(block_frames,) + imgs.shape[1:]\n",
         "        )\n",
         "\n",
+        "        # Drop empty basis images and threshold the rest\n",
         "        da_imgs_pos = da.where(da_imgs > 0, da_imgs, 0)\n",
         "        da_imgs_norm = normalize_data(da_imgs_pos)\n",
         "\n",
@@ -1675,6 +1676,7 @@
         "        )\n",
         "        da_imgs_thrd = (da_imgs_thrd > basis_threshold)\n",
         "\n",
+        "        # Find the label images for each basis image\n",
         "        da_lbl_imgs = []\n",
         "        da_num_lbls = []\n",
         "        for i in irange(len(da_imgs_thrd)):\n",
@@ -1699,6 +1701,7 @@
         "\n",
         "        da_lbl_imgs, da_num_lbls, da_adj_lbls = executor.persist([da_lbl_imgs, da_num_lbls, da_adj_lbls])\n",
         "\n",
+        "        # Extract ROIs from basis images\n",
         "        da_msk_imgs = []\n",
         "        for i in irange(len(da_lbl_imgs)):\n",
         "            for l in irange(1 + da_adj_lbls[i].compute(),\n",
@@ -1710,6 +1713,7 @@
         "\n",
         "        del da_num_lbls, da_adj_lbls\n",
         "\n",
+        "        # Generate label image and convert mask type\n",
         "        da_lbl_img = (\n",
         "            da.arange(\n",
         "                1, 1 + len(da_msk_imgs), chunks=da_msk_imgs.chunks[0]\n",