Merge pull request #41 from ndcn/wfcleanup

finalize stats final_io notebook
ndcn · Apr 15, 2023 · 2afb445 · 2afb445
2 parents 9263949 + 29f0aa4
commit 2afb445
Show file tree

Hide file tree

Showing 10 changed files with 3,681 additions and 461 deletions.
diff --git a/infer_subc_2d/core/file_io.py b/infer_subc_2d/core/file_io.py
@@ -99,7 +99,7 @@ def export_inferred_organelle(img_out: np.ndarray, name: str, meta_dict: Dict, o
     img_name_out = f"{img_name.stem}-{name}"
     # HACK: skip the ome
     # out_file_n = export_ome_tiff(img_out, meta_dict, img_name_out, str(out_data_path) + "/", name)
-    out_file_n = export_tiff(img_out, meta_dict, img_name_out, str(out_data_path), name)
+    out_file_n = export_tiff(img_out, img_name_out, out_data_path, name, meta_dict)
     print(f"saved file: {out_file_n}")
     return out_file_n
 
@@ -219,23 +219,29 @@ def read_tiff_image(image_name):
     return tiff image with tifffile.imread.  Using the `reader_function` (vial read_ome_image) and AICSimage is too slow
         prsumably handling the OME meta data is what is so slow.
     """
-    start = time.time()
+    # start = time.time()
     image = imread(
         image_name,
     )
-    end = time.time()
-    print(f">>>>>>>>>>>> tifffile.imread  (dtype={image.dtype}in ({(end - start):0.2f}) sec")
+    # end = time.time()
+    # print(f">>>>>>>>>>>> tifffile.imread  (dtype={image.dtype} in ({(end - start):0.2f}) sec")
     return image  # .get_image_data("CZYX")
 
 
-def export_tiff(data_in, meta_in, img_name, out_path, channel_names) -> str:
+def export_tiff(
+    data_in: np.ndarray,
+    img_name: str,
+    out_path: Union[Path, str],
+    channel_names: Union[List[str], None] = None,
+    meta_in: Union[Dict, None] = None,
+) -> int:
     """
     wrapper for exporting  tiff with tifffile.imwrite
      --> usiong AICSimage is too slow
         prsumably handling the OME meta data is what is so slow.
     """
 
-    start = time.time()
+    # start = time.time()
 
     out_name = Path(out_path, f"{img_name}.tiff")
 
@@ -259,10 +265,11 @@ def export_tiff(data_in, meta_in, img_name, out_path, channel_names) -> str:
     dtype = data_in.dtype
     if dtype == "bool":
         data_in = data_in.astype(np.uint8)
-        data_in[data_in > 0] = 1
+        data_in[data_in > 0] = 255
         dtype = data_in.dtype
         print(f"changed dtype from bool to {dtype}")
-
+    else:
+        print("export dtype - {dtype}")
     ret = imwrite(
         out_name,
         data_in,
@@ -273,20 +280,21 @@ def export_tiff(data_in, meta_in, img_name, out_path, channel_names) -> str:
         #     # "channel_names": channel_names,
         # },
     )
-    end = time.time()
-    print(f">>>>>>>>>>>> tifffile.imwrite in ({(end - start):0.2f}) sec")
+    # end = time.time()
+    # print(f">>>>>>>>>>>> tifffile.imwrite in ({(end - start):0.2f}) sec")
     return ret
 
 
 # function to collect all the
-def list_image_files(data_folder: Path, file_type: str, prefix: Union[str, None] = None) -> List:
+def list_image_files(data_folder: Path, file_type: str, postfix: Union[str, None] = None) -> List:
     """
     get a list of all the filetypes
     TODO: aics has cleaner functions than this "lambda"
     should this use Path methods? or return Path?
     """
-    if prefix is not None:
-        return sorted(data_folder.glob(f"{prefix}*{file_type}"))
+
+    if postfix is not None:
+        return sorted(data_folder.glob(f"*{postfix}{file_type}"))
     else:
         return sorted(data_folder.glob(f"*{file_type}"))
 
@@ -410,7 +418,8 @@ def export_inferred_organelle_AICS(img_out: np.ndarray, name: str, meta_dict: Di
         print(f"making {out_data_path}")
 
     img_name_out = f"{img_name.stem}-{name}"
-    out_file_n = export_tiff_AICS(img_out, meta_dict, img_name_out, str(out_data_path), name)
+    out_file_n = export_tiff_AICS(img_out, img_name_out, out_data_path, name, meta_dict)
+
     print(f"saved file: {out_file_n}")
     return out_file_n
 
@@ -434,7 +443,13 @@ def read_tiff_image_AICS(image_name):
     return im_out
 
 
-def export_tiff_AICS(data_in, meta_in, img_name, out_path, channel_names) -> str:
+def export_tiff_AICS(
+    data_in: np.ndarray,
+    img_name: str,
+    out_path: Union[Path, str],
+    channel_names: Union[List[str], None] = None,
+    meta_in: Union[Dict, None] = None,
+) -> str:
     """
     aicssegmentation way to do it
     """
@@ -445,7 +460,7 @@ def export_tiff_AICS(data_in, meta_in, img_name, out_path, channel_names) -> str
 
     if data_in.dtype == "bool":
         data_in = data_in.astype(np.uint8)
-        data_in[data_in > 0] = 1
+        data_in[data_in > 0] = 255
 
     OmeTiffWriter.save(data=data_in, uri=out_name.as_uri(), dim_order="ZYX")
     end = time.time()

diff --git a/infer_subc_2d/core/img.py b/infer_subc_2d/core/img.py
@@ -261,7 +261,7 @@ def get_interior_labels(img_in: np.ndarray) -> np.ndarray:
 
     Returns
     -------------
-        np.ndimage of labeled segmentations NOT touching the sides as `np.uint8`
+        np.ndimage of labeled segmentations NOT touching the sides
 
     """
     segmented_padded = np.pad(
@@ -1246,7 +1246,7 @@ def adjacent(labels):
 
 def img_to_uint8(data_in: np.ndarray) -> np.ndarray:
     """
-    helper to convert bask to uint8 (true -> 255)
+    helper to convert bask to `binary` uint8 (true -> 255) to accomodate napari default scaling
     """
     print(f"changing from {data_in.dtype} to np.uint8")
     data_in = data_in.astype(np.uint8)

diff --git a/infer_subc_2d/utils/stats_helpers.py b/infer_subc_2d/utils/stats_helpers.py
@@ -0,0 +1,124 @@
+import numpy as np
+from typing import Any, List
+from pathlib import Path
+
+from infer_subc_2d.core.img import apply_mask
+
+import pandas as pd
+from infer_subc_2d.utils.stats import _assert_uint16_labels
+
+from .stats import get_aXb_stats_3D, get_summary_stats_3D, get_simple_stats_3D
+
+
+def shell_cross_stats(
+    organelle_names: List[str], organelles: List[np.ndarray], mask: np.ndarray, out_data_path: Path, source_file: str
+) -> int:
+    """
+    get all cross stats between organelles `a` and `b`, and "shell of `a`" and `b`.   "shell" is the boundary of `a`
+    calls `get_aXb_stats_3D`
+    """
+    count = 0
+    for j, target in enumerate(organelle_names):
+        print(f"getting stats for A = {target}")
+        a = organelles[j]
+        # loop over Bs
+        for i, nmi in enumerate(organelle_names):
+            if i != j:
+                # get overall stats of intersection
+                print(f"  X {nmi}")
+                b = organelles[i]
+                stats_tab = get_aXb_stats_3D(a, b, mask)
+                csv_path = out_data_path / f"{source_file.stem}-{target}X{nmi}-stats.csv"
+                stats_tab.to_csv(csv_path)
+
+                e_stats_tab = get_aXb_stats_3D(a, b, mask, use_shell_a=True)
+                csv_path = out_data_path / f"{source_file.stem}-{target}_shellX{nmi}-stats.csv"
+                e_stats_tab.to_csv(csv_path)
+
+                count += 1
+    return count
+
+
+def organelle_stats(
+    organelle_names: List[str],
+    organelles: List[np.ndarray],
+    intinsities: List[np.ndarray],
+    mask: np.ndarray,
+    out_data_path: Path,
+    source_file: str,
+) -> int:
+    """
+    get summary and all cross stats between organelles `a` and `b`
+    calls `get_summary_stats_3D`
+    """
+    count = 0
+    org_stats_tabs = []
+    for j, target in enumerate(organelle_names):
+        print(f"getting stats for A = {target}")
+        a = organelles[j]
+        # A_stats_tab, rp = get_simple_stats_3D(A,mask)
+        a_stats_tab, rp = get_summary_stats_3D(a, intinsities[j], mask)
+
+        # loop over Bs
+        for i, nmi in enumerate(organelle_names):
+            if i != j:
+                # get overall stats of intersection
+                print(f"  b = {nmi}")
+                count += 1
+                # add the list of touches
+                b = _assert_uint16_labels(organelles[i])
+
+                ov = []
+                b_labs = []
+                labs = []
+                for idx, lab in enumerate(a_stats_tab["label"]):  # loop over A_objects
+                    xyz = tuple(rp[idx].coords.T)
+                    cmp_org = b[xyz]
+
+                    # total number of overlapping pixels
+                    overlap = sum(cmp_org > 0)
+                    # overlap?
+                    labs_b = cmp_org[cmp_org > 0]
+                    b_js = np.unique(labs_b).tolist()
+
+                    # if overlap > 0:
+                    labs.append(lab)
+                    ov.append(overlap)
+                    b_labs.append(b_js)
+
+                # add organelle B columns to A_stats_tab
+                a_stats_tab[f"{nmi}_overlap"] = ov
+                a_stats_tab[f"{nmi}_labels"] = b_labs  # might want to make this easier for parsing later
+
+        # org_stats_tabs.append(A_stats_tab)
+        csv_path = out_data_path / f"{source_file.stem}-{target}-stats.csv"
+        a_stats_tab.to_csv(csv_path)
+
+    print(f"dumped {count} csvs")
+    return count
+
+
+def dump_stats(
+    name: str,
+    segmentation: np.ndarray,
+    intensity_img: np.ndarray,
+    mask: np.ndarray,
+    out_data_path: Path,
+    source_file: str,
+) -> pd.DataFrame:
+    """
+    get summary stats of organelle only
+    calls `get_summary_stats_3D`
+    """
+
+    stats_table, _ = get_summary_stats_3D(segmentation, intensity_img, mask)
+    csv_path = out_data_path / f"{source_file.stem}-{name}-basicstats.csv"
+    stats_table.to_csv(csv_path)
+    print(f"dumped {name} table to {csv_path}")
+
+    return stats_table
+
+
+# refactor to just to a target vs. list of probes
+# for nuclei mask == cellmask
+# for all oother mask == cytoplasm
diff --git a/infer_subc_2d/workflow/batch_workflow.py b/infer_subc_2d/workflow/batch_workflow.py
@@ -264,14 +264,14 @@ def _format_output(self, image: np.ndarray):
         """
         if image.dtype == "bool":
             image = image.astype(np.uint8)
-            image[image > 0] = 1
+            image[image > 0] = 255
             msg = f"converted boolean to {image.dtype}. "
             self._write_to_log_file(msg)
         elif image.dtype == np.uint8:
             msg = f"mask already  {image.dtype}"
             print(msg)
             self._write_to_log_file(msg)
-            image[image > 0] = 1
+            image[image > 0] = 255
         else:
             image = image.astype(np.uint16)
             msg = f" enforced  {image.dtype}"

diff --git a/notebooks/01_infer_nuclei.ipynb b/notebooks/01_infer_nuclei.ipynb
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -91,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -154,7 +154,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -257,7 +257,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -300,7 +300,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -326,7 +326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -348,7 +348,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [