From c03e84e7d76cc21d072b260199c0ad3db6e362f8 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Tue, 3 Apr 2018 16:16:38 -0400
Subject: [PATCH 1/3] Import dask_ndmeasure

Make sure to import `dask_ndmeasure` along with the other dask-image org
libraries. Will be needed to construct the label image. IOW will be used
for ROI extraction.
---
 nanshe_ipython.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index c09b222..69ba1b1 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -190,6 +190,7 @@
         "import dask_imread\n",
         "import dask_ndfilters\n",
         "import dask_ndfourier\n",
+        "import dask_ndmeasure\n",
         "\n",
         "import zarr\n",
         "\n",

From 192d030a54b294b69dd4358dcc6e11e5ee2e03ff Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Tue, 3 Apr 2018 16:16:40 -0400
Subject: [PATCH 2/3] WIP: Rewrite post-processing

Simply use a threshold with no other constraints.
---
 nanshe_ipython.ipynb | 117 +++++++++++++++----------------------------
 1 file changed, 41 insertions(+), 76 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index 69ba1b1..e4ae09b 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -1159,15 +1159,7 @@
         "### Postprocessing\n",
         "\n",
         "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n",
-        "* `wavelet_scale` (`int`): scale of wavelet transform to apply (should be the same as the one used above)\n",
-        "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant\n",
-        "* `accepted_region_shape_constraints` (`dict`): if ROIs don't match this, reduce the `wavelet_scale` once.\n",
-        "* `percentage_pixels_below_max` (`float`): upper bound on ratio of ROI pixels not at max intensity vs. all ROI pixels\n",
-        "* `min_local_max_distance` (`float`): minimum allowable euclidean distance between two ROIs maximum intensities\n",
-        "* `accepted_neuron_shape_constraints` (`dict`): shape constraints for ROI to be kept.\n",
-        "\n",
-        "* `alignment_min_threshold` (`float`): similarity measure of the intensity of two ROIs images used for merging.\n",
-        "* `overlap_min_threshold` (`float`): similarity measure of the masks of two ROIs used for merging."
+        "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant"
       ]
     },
     {
@@ -1177,13 +1169,7 @@
       "outputs": [],
       "source": [
         "significance_threshold = 3.0\n",
-        "wavelet_scale = 3\n",
-        "noise_threshold = 3.0\n",
-        "percentage_pixels_below_max = 0.8\n",
-        "min_local_max_distance = 16.0\n",
-        "\n",
-        "alignment_min_threshold = 0.6\n",
-        "overlap_min_threshold = 0.6\n",
+        "noise_threshold = 1.0\n",
         "\n",
         "\n",
         "with suppress(KeyError):\n",
@@ -1191,68 +1177,47 @@
         "zarr_store.require_group(subgroup_post)\n",
         "\n",
         "\n",
-        "imgs = dask_store._diskstore[subgroup_dict]\n",
-        "da_imgs = da.from_array(imgs, chunks=((1,) + imgs.shape[1:]))\n",
-        "\n",
-        "result = block_postprocess_data_parallel(client)(da_imgs,\n",
-        "                              **{\n",
-        "                                    \"wavelet_denoising\" : {\n",
-        "                                        \"estimate_noise\" : {\n",
-        "                                            \"significance_threshold\" : significance_threshold\n",
-        "                                        },\n",
-        "                                        \"wavelet.transform\" : {\n",
-        "                                            \"scale\" : wavelet_scale\n",
-        "                                        },\n",
-        "                                        \"significant_mask\" : {\n",
-        "                                            \"noise_threshold\" : noise_threshold\n",
-        "                                        },\n",
-        "                                        \"accepted_region_shape_constraints\" : {\n",
-        "                                            \"major_axis_length\" : {\n",
-        "                                                \"min\" : 0.0,\n",
-        "                                                \"max\" : 25.0\n",
-        "                                            }\n",
-        "                                        },\n",
-        "                                        \"remove_low_intensity_local_maxima\" : {\n",
-        "                                            \"percentage_pixels_below_max\" : percentage_pixels_below_max\n",
-        "                                        },\n",
-        "                                        \"remove_too_close_local_maxima\" : {\n",
-        "                                            \"min_local_max_distance\" : min_local_max_distance\n",
-        "                                        },\n",
-        "                                        \"accepted_neuron_shape_constraints\" : {\n",
-        "                                            \"area\" : {\n",
-        "                                                \"min\" : 25,\n",
-        "                                                \"max\" : 600\n",
-        "                                            },\n",
-        "                                            \"eccentricity\" : {\n",
-        "                                                \"min\" : 0.0,\n",
-        "                                                \"max\" : 0.9\n",
-        "                                            }\n",
-        "                                        }\n",
-        "                                    },\n",
-        "                                    \"merge_neuron_sets\" : {\n",
-        "                                        \"alignment_min_threshold\" : alignment_min_threshold,\n",
-        "                                        \"overlap_min_threshold\" : overlap_min_threshold,\n",
-        "                                        \"fuse_neurons\" : {\n",
-        "                                            \"fraction_mean_neuron_max_threshold\" : 0.01\n",
-        "                                        }\n",
-        "                                    }\n",
-        "                              }\n",
-        ")\n",
+        "da_imgs = dask_store[subgroup_dict]\n",
+        "da_imgs = da_imgs.rechunk(((1,) + da_imgs.shape[1:]))\n",
         "\n",
-        "# Store projections\n",
-        "dask_store.update(dict(zip(\n",
-        "    [\"%s/%s\" % (subgroup_post, e) for e in result.dtype.names],\n",
-        "    [result[e] for e in result.dtype.names]\n",
-        ")))\n",
+        "da_imgs = da_imgs[0]\n",
         "\n",
-        "dask.distributed.progress(\n",
-        "    dask.distributed.futures_of([\n",
-        "        dask_store[\"%s/%s\" % (subgroup_post, e)]\n",
-        "        for e in result.dtype.names\n",
-        "    ]),\n",
-        "    notebook=False\n",
-        ")\n",
-        "print(\"\")"
+        "da_imgs_thrd = (da_imgs - noise_threshold * (da_imgs - significance_threshold * da_imgs.std()).std()) > 0\n",
+        "\n",
+        "da_lbl_img, da_num_lbls = dask_ndmeasure.label(da_imgs_thrd)\n",
+        "da_lbl_img, da_num_lbls = client.persist([da_lbl_img, da_num_lbls])\n",
+        "\n",
+        "da_result = []\n",
+        "for i in irange(1, 1 + int(da_num_lbls)):\n",
+        "    da_result.append(da_lbl_img == i)\n",
+        "da_result = da.stack(da_result)\n",
+        "\n",
+        "dask_store[subgroup_post_mask] = da_result\n",
+        "\n",
+        "dask.distributed.progress(dask_store[subgroup_post_mask], notebook=False)\n",
+        "print(\"\")\n",
+        "\n",
+        "\n",
+        "# View results\n",
+        "imgs_min, imgs_max = 0, 100\n",
+        "\n",
+        "da_imgs = dask_store[subgroup_post_mask]\n",
+        "da_imgs = da_imgs.astype(np.uint8)\n",
+        "\n",
+        "da_imgs_min, da_imgs_max = da_imgs.min(), da_imgs.max()\n",
+        "\n",
+        "status = client.compute([da_imgs_min, da_imgs_max])\n",
+        "dask.distributed.progress(status, notebook=False)\n",
+        "print(\"\")\n",
+        "\n",
+        "imgs_min, imgs_max = [s.result() for s in status]\n",
+        "\n",
+        "mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "mplsv.set_images(\n",
+        "    da_imgs,\n",
+        "    vmin=imgs_min,\n",
+        "    vmax=imgs_max\n",
+        ")"
       ]
     },
     {

From 8ce523796de7e5cd6be0189564166d17b9a30a3b Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Tue, 3 Apr 2018 16:16:41 -0400
Subject: [PATCH 3/3] Try mapping label and convert to masks

Also make the chunks concrete (as they are undefined) to allow storage.
---
 nanshe_ipython.ipynb | 106 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 85 insertions(+), 21 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index e4ae09b..2183950 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -1162,6 +1162,60 @@
         "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def np_label(a):\n",
+        "    return np.array(\n",
+        "        scipy.ndimage.label(a),\n",
+        "        dtype=[\n",
+        "            (\"label\", np.int32, da_imgs_thrd.shape),\n",
+        "            (\"num\", int, ()),\n",
+        "        ]\n",
+        "    )\n",
+        "\n",
+        "def label_chunk(a):\n",
+        "    return np.stack([np_label(e) for e in a])\n",
+        "\n",
+        "def label(d):\n",
+        "    d_lbld = d.map_blocks(\n",
+        "        label_chunk,\n",
+        "        dtype=[\n",
+        "            (\"label\", np.int32, d.shape[1:]),\n",
+        "            (\"num\", int, ()),\n",
+        "        ],\n",
+        "        drop_axis=tuple(irange(1, d.ndim))\n",
+        "    )\n",
+        "\n",
+        "    return d_lbld[\"label\"], d_lbld[\"num\"]\n",
+        "\n",
+        "def np_labels_to_masks_chunk(a, num):\n",
+        "    r = np.empty((0,) + a.shape[1:], dtype=bool)\n",
+        "    if num:\n",
+        "        r = np.concatenate([a == i for i in irange(1, 1 + num)])\n",
+        "    return r\n",
+        "\n",
+        "def labels_to_masks_chunk(a, num):\n",
+        "    r = np.empty((0,) + a.shape[2:], dtype=bool)\n",
+        "    if len(num):\n",
+        "        r = np.concatenate([np_labels_to_masks_chunk(e0, e1) for e0, e1 in zip(a, num)])\n",
+        "    return r\n",
+        "\n",
+        "def labels_to_masks(d, nums):\n",
+        "    out = da.atop(\n",
+        "        labels_to_masks_chunk, tuple(irange(d.ndim)),\n",
+        "        d, tuple(irange(d.ndim)),\n",
+        "        nums, tuple(irange(nums.ndim)),\n",
+        "        dtype=bool\n",
+        "    )\n",
+        "    out._chunks = (len(out.chunks[0]) * (np.nan,),) + out.chunks[1:]\n",
+        "\n",
+        "    return out"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -1180,43 +1234,53 @@
         "da_imgs = dask_store[subgroup_dict]\n",
         "da_imgs = da_imgs.rechunk(((1,) + da_imgs.shape[1:]))\n",
         "\n",
-        "da_imgs = da_imgs[0]\n",
-        "\n",
         "da_imgs_thrd = (da_imgs - noise_threshold * (da_imgs - significance_threshold * da_imgs.std()).std()) > 0\n",
         "\n",
-        "da_lbl_img, da_num_lbls = dask_ndmeasure.label(da_imgs_thrd)\n",
-        "da_lbl_img, da_num_lbls = client.persist([da_lbl_img, da_num_lbls])\n",
+        "da_lbl_img, da_num_lbls = label(da_imgs_thrd)\n",
         "\n",
-        "da_result = []\n",
-        "for i in irange(1, 1 + int(da_num_lbls)):\n",
-        "    da_result.append(da_lbl_img == i)\n",
-        "da_result = da.stack(da_result)\n",
+        "da_result = labels_to_masks(da_lbl_img, da_num_lbls)\n",
+        "da_result = da_result.astype(np.uint8)\n",
         "\n",
-        "dask_store[subgroup_post_mask] = da_result\n",
+        "da_result = client.persist(da_result)\n",
         "\n",
-        "dask.distributed.progress(dask_store[subgroup_post_mask], notebook=False)\n",
-        "print(\"\")\n",
+        "dask.distributed.progress(da_result, notebook=False)\n",
         "\n",
         "\n",
-        "# View results\n",
-        "imgs_min, imgs_max = 0, 100\n",
+        "# Make chunks concrete\n",
         "\n",
-        "da_imgs = dask_store[subgroup_post_mask]\n",
-        "da_imgs = da_imgs.astype(np.uint8)\n",
+        "da_result_chunks_0 = tuple(\n",
+        "    da_result[:, 0, 0].map_blocks(lambda e: np.atleast_1d(np.ones_like(e).astype(int).sum())).compute()\n",
+        ")\n",
         "\n",
-        "da_imgs_min, da_imgs_max = da_imgs.min(), da_imgs.max()\n",
+        "da_result_keys_0, da_result_chunks_0 = list(zip(*[[k, c] for k, c in zip(dask.core.flatten(da_result.__dask_keys__()), da_result_chunks_0) if c]))\n",
         "\n",
-        "status = client.compute([da_imgs_min, da_imgs_max])\n",
-        "dask.distributed.progress(status, notebook=False)\n",
+        "da_result_chunks = (\n",
+        "    (da_result_chunks_0,) + da_result.chunks[1:]\n",
+        ")\n",
+        "\n",
+        "# (cls, dask, name, chunks, dtype, shape=None):\n",
+        "da_result_2 = da.Array(\n",
+        "    dask.sharedict.merge(dask.optimization.cull(da_result.__dask_graph__(), list(da_result_keys_0))[0]),\n",
+        "    da_result.name,\n",
+        "    da_result_chunks,\n",
+        "    da_result.dtype\n",
+        ")\n",
+        "\n",
+        "dask_store[subgroup_post_mask] = da_result_2\n",
+        "\n",
+        "dask.distributed.progress(dask_store[subgroup_post_mask], notebook=False)\n",
         "print(\"\")\n",
         "\n",
-        "imgs_min, imgs_max = [s.result() for s in status]\n",
+        "\n",
+        "# View results\n",
+        "da_imgs = dask_store[subgroup_post_mask]\n",
+        "da_imgs = da_imgs.astype(np.uint8)\n",
         "\n",
         "mplsv = plt.figure(FigureClass=MPLViewer)\n",
         "mplsv.set_images(\n",
         "    da_imgs,\n",
-        "    vmin=imgs_min,\n",
-        "    vmax=imgs_max\n",
+        "    vmin=0,\n",
+        "    vmax=1\n",
         ")"
       ]
     },