Skip to content

Commit

Permalink
Merge pull request #41 from ndcn/wfcleanup
Browse files Browse the repository at this point in the history
finalize stats final_io notebook
  • Loading branch information
ergonyc authored Apr 15, 2023
2 parents 9263949 + 29f0aa4 commit 2afb445
Show file tree
Hide file tree
Showing 10 changed files with 3,681 additions and 461 deletions.
47 changes: 31 additions & 16 deletions infer_subc_2d/core/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def export_inferred_organelle(img_out: np.ndarray, name: str, meta_dict: Dict, o
img_name_out = f"{img_name.stem}-{name}"
# HACK: skip the ome
# out_file_n = export_ome_tiff(img_out, meta_dict, img_name_out, str(out_data_path) + "/", name)
out_file_n = export_tiff(img_out, meta_dict, img_name_out, str(out_data_path), name)
out_file_n = export_tiff(img_out, img_name_out, out_data_path, name, meta_dict)
print(f"saved file: {out_file_n}")
return out_file_n

Expand Down Expand Up @@ -219,23 +219,29 @@ def read_tiff_image(image_name):
return tiff image with tifffile.imread. Using the `reader_function` (vial read_ome_image) and AICSimage is too slow
prsumably handling the OME meta data is what is so slow.
"""
start = time.time()
# start = time.time()
image = imread(
image_name,
)
end = time.time()
print(f">>>>>>>>>>>> tifffile.imread (dtype={image.dtype}in ({(end - start):0.2f}) sec")
# end = time.time()
# print(f">>>>>>>>>>>> tifffile.imread (dtype={image.dtype} in ({(end - start):0.2f}) sec")
return image # .get_image_data("CZYX")


def export_tiff(data_in, meta_in, img_name, out_path, channel_names) -> str:
def export_tiff(
data_in: np.ndarray,
img_name: str,
out_path: Union[Path, str],
channel_names: Union[List[str], None] = None,
meta_in: Union[Dict, None] = None,
) -> int:
"""
wrapper for exporting tiff with tifffile.imwrite
--> usiong AICSimage is too slow
prsumably handling the OME meta data is what is so slow.
"""

start = time.time()
# start = time.time()

out_name = Path(out_path, f"{img_name}.tiff")

Expand All @@ -259,10 +265,11 @@ def export_tiff(data_in, meta_in, img_name, out_path, channel_names) -> str:
dtype = data_in.dtype
if dtype == "bool":
data_in = data_in.astype(np.uint8)
data_in[data_in > 0] = 1
data_in[data_in > 0] = 255
dtype = data_in.dtype
print(f"changed dtype from bool to {dtype}")

else:
print("export dtype - {dtype}")
ret = imwrite(
out_name,
data_in,
Expand All @@ -273,20 +280,21 @@ def export_tiff(data_in, meta_in, img_name, out_path, channel_names) -> str:
# # "channel_names": channel_names,
# },
)
end = time.time()
print(f">>>>>>>>>>>> tifffile.imwrite in ({(end - start):0.2f}) sec")
# end = time.time()
# print(f">>>>>>>>>>>> tifffile.imwrite in ({(end - start):0.2f}) sec")
return ret


# function to collect all the
def list_image_files(data_folder: Path, file_type: str, prefix: Union[str, None] = None) -> List:
def list_image_files(data_folder: Path, file_type: str, postfix: Union[str, None] = None) -> List:
"""
get a list of all the filetypes
TODO: aics has cleaner functions than this "lambda"
should this use Path methods? or return Path?
"""
if prefix is not None:
return sorted(data_folder.glob(f"{prefix}*{file_type}"))

if postfix is not None:
return sorted(data_folder.glob(f"*{postfix}{file_type}"))
else:
return sorted(data_folder.glob(f"*{file_type}"))

Expand Down Expand Up @@ -410,7 +418,8 @@ def export_inferred_organelle_AICS(img_out: np.ndarray, name: str, meta_dict: Di
print(f"making {out_data_path}")

img_name_out = f"{img_name.stem}-{name}"
out_file_n = export_tiff_AICS(img_out, meta_dict, img_name_out, str(out_data_path), name)
out_file_n = export_tiff_AICS(img_out, img_name_out, out_data_path, name, meta_dict)

print(f"saved file: {out_file_n}")
return out_file_n

Expand All @@ -434,7 +443,13 @@ def read_tiff_image_AICS(image_name):
return im_out


def export_tiff_AICS(data_in, meta_in, img_name, out_path, channel_names) -> str:
def export_tiff_AICS(
data_in: np.ndarray,
img_name: str,
out_path: Union[Path, str],
channel_names: Union[List[str], None] = None,
meta_in: Union[Dict, None] = None,
) -> str:
"""
aicssegmentation way to do it
"""
Expand All @@ -445,7 +460,7 @@ def export_tiff_AICS(data_in, meta_in, img_name, out_path, channel_names) -> str

if data_in.dtype == "bool":
data_in = data_in.astype(np.uint8)
data_in[data_in > 0] = 1
data_in[data_in > 0] = 255

OmeTiffWriter.save(data=data_in, uri=out_name.as_uri(), dim_order="ZYX")
end = time.time()
Expand Down
4 changes: 2 additions & 2 deletions infer_subc_2d/core/img.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def get_interior_labels(img_in: np.ndarray) -> np.ndarray:
Returns
-------------
np.ndimage of labeled segmentations NOT touching the sides as `np.uint8`
np.ndimage of labeled segmentations NOT touching the sides
"""
segmented_padded = np.pad(
Expand Down Expand Up @@ -1246,7 +1246,7 @@ def adjacent(labels):

def img_to_uint8(data_in: np.ndarray) -> np.ndarray:
"""
helper to convert bask to uint8 (true -> 255)
helper to convert bask to `binary` uint8 (true -> 255) to accomodate napari default scaling
"""
print(f"changing from {data_in.dtype} to np.uint8")
data_in = data_in.astype(np.uint8)
Expand Down
124 changes: 124 additions & 0 deletions infer_subc_2d/utils/stats_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np
from typing import Any, List
from pathlib import Path

from infer_subc_2d.core.img import apply_mask

import pandas as pd
from infer_subc_2d.utils.stats import _assert_uint16_labels

from .stats import get_aXb_stats_3D, get_summary_stats_3D, get_simple_stats_3D


def shell_cross_stats(
organelle_names: List[str], organelles: List[np.ndarray], mask: np.ndarray, out_data_path: Path, source_file: str
) -> int:
"""
get all cross stats between organelles `a` and `b`, and "shell of `a`" and `b`. "shell" is the boundary of `a`
calls `get_aXb_stats_3D`
"""
count = 0
for j, target in enumerate(organelle_names):
print(f"getting stats for A = {target}")
a = organelles[j]
# loop over Bs
for i, nmi in enumerate(organelle_names):
if i != j:
# get overall stats of intersection
print(f" X {nmi}")
b = organelles[i]
stats_tab = get_aXb_stats_3D(a, b, mask)
csv_path = out_data_path / f"{source_file.stem}-{target}X{nmi}-stats.csv"
stats_tab.to_csv(csv_path)

e_stats_tab = get_aXb_stats_3D(a, b, mask, use_shell_a=True)
csv_path = out_data_path / f"{source_file.stem}-{target}_shellX{nmi}-stats.csv"
e_stats_tab.to_csv(csv_path)

count += 1
return count


def organelle_stats(
organelle_names: List[str],
organelles: List[np.ndarray],
intinsities: List[np.ndarray],
mask: np.ndarray,
out_data_path: Path,
source_file: str,
) -> int:
"""
get summary and all cross stats between organelles `a` and `b`
calls `get_summary_stats_3D`
"""
count = 0
org_stats_tabs = []
for j, target in enumerate(organelle_names):
print(f"getting stats for A = {target}")
a = organelles[j]
# A_stats_tab, rp = get_simple_stats_3D(A,mask)
a_stats_tab, rp = get_summary_stats_3D(a, intinsities[j], mask)

# loop over Bs
for i, nmi in enumerate(organelle_names):
if i != j:
# get overall stats of intersection
print(f" b = {nmi}")
count += 1
# add the list of touches
b = _assert_uint16_labels(organelles[i])

ov = []
b_labs = []
labs = []
for idx, lab in enumerate(a_stats_tab["label"]): # loop over A_objects
xyz = tuple(rp[idx].coords.T)
cmp_org = b[xyz]

# total number of overlapping pixels
overlap = sum(cmp_org > 0)
# overlap?
labs_b = cmp_org[cmp_org > 0]
b_js = np.unique(labs_b).tolist()

# if overlap > 0:
labs.append(lab)
ov.append(overlap)
b_labs.append(b_js)

# add organelle B columns to A_stats_tab
a_stats_tab[f"{nmi}_overlap"] = ov
a_stats_tab[f"{nmi}_labels"] = b_labs # might want to make this easier for parsing later

# org_stats_tabs.append(A_stats_tab)
csv_path = out_data_path / f"{source_file.stem}-{target}-stats.csv"
a_stats_tab.to_csv(csv_path)

print(f"dumped {count} csvs")
return count


def dump_stats(
name: str,
segmentation: np.ndarray,
intensity_img: np.ndarray,
mask: np.ndarray,
out_data_path: Path,
source_file: str,
) -> pd.DataFrame:
"""
get summary stats of organelle only
calls `get_summary_stats_3D`
"""

stats_table, _ = get_summary_stats_3D(segmentation, intensity_img, mask)
csv_path = out_data_path / f"{source_file.stem}-{name}-basicstats.csv"
stats_table.to_csv(csv_path)
print(f"dumped {name} table to {csv_path}")

return stats_table


# refactor to just to a target vs. list of probes
# for nuclei mask == cellmask
# for all oother mask == cytoplasm
4 changes: 2 additions & 2 deletions infer_subc_2d/workflow/batch_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,14 @@ def _format_output(self, image: np.ndarray):
"""
if image.dtype == "bool":
image = image.astype(np.uint8)
image[image > 0] = 1
image[image > 0] = 255
msg = f"converted boolean to {image.dtype}. "
self._write_to_log_file(msg)
elif image.dtype == np.uint8:
msg = f"mask already {image.dtype}"
print(msg)
self._write_to_log_file(msg)
image[image > 0] = 1
image[image > 0] = 255
else:
image = image.astype(np.uint16)
msg = f" enforced {image.dtype}"
Expand Down
18 changes: 9 additions & 9 deletions notebooks/01_infer_nuclei.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -91,7 +91,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -125,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -154,7 +154,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -237,7 +237,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -257,7 +257,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -300,7 +300,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -326,7 +326,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -348,7 +348,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand Down
Loading

0 comments on commit 2afb445

Please sign in to comment.