From da4544467152f74b0776ad1f75ecf034731d1b7d Mon Sep 17 00:00:00 2001
From: Harry Carey <harry.carey95@gmail.com>
Date: Thu, 27 Jul 2023 16:51:33 +0200
Subject: [PATCH] Improved the ability to add a list of badly damaged sections
 and a method to automatically detect them

---
 .gitignore                                    |   4 +-
 .../spacing_and_indexing.py                   | 129 +++++++++++++-----
 DeepSlice/main.py                             |   4 +-
 3 files changed, 97 insertions(+), 40 deletions(-)

diff --git a/.gitignore b/.gitignore
index eae3018..2c98adf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,4 +34,6 @@ var/
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
-DeepSlice.egg-info
\ No newline at end of file
+DeepSlice.egg-info
+test.py
+.vscode/settings.json
diff --git a/DeepSlice/coord_post_processing/spacing_and_indexing.py b/DeepSlice/coord_post_processing/spacing_and_indexing.py
index 7c98cbe..57a7363 100644
--- a/DeepSlice/coord_post_processing/spacing_and_indexing.py
+++ b/DeepSlice/coord_post_processing/spacing_and_indexing.py
@@ -1,4 +1,4 @@
-from typing import Union, List
+from typing import Union, List, Optional
 import numpy as np
 import pandas as pd
 import re
@@ -23,7 +23,7 @@ def trim_mean(arr: np.array, percent: int) -> float:
 
 
 def calculate_average_section_thickness(
-    section_numbers: List[Union[int, float]], section_depth: List[Union[int, float]], method="weighted",
+    section_numbers: List[Union[int, float]], section_depth: List[Union[int, float]], bad_sections, method="weighted",
     species="mouse"
 ) -> float:
     """
@@ -37,33 +37,28 @@ def calculate_average_section_thickness(
     :rtype: float
     """
     # inter section number differences
+    if bad_sections is not None:
+        section_numbers = section_numbers[bad_sections == False].reset_index(drop=True)
+        section_depth = section_depth[bad_sections == False]
     number_spacing = section_numbers[:-1].values - section_numbers[1:].values
     # inter section depth differences
     depth_spacing = section_depth[:-1] - section_depth[1:]
     # dividing depth spacing by number spacing allows us to control for missing sections
-    min = 0
-    max = np.max(section_numbers)
-    if species == "mouse":
-        min, max = 0, 528
-    elif species == "rat":
-        min, max = 0, 1024
-    if method == "weighted":
-        weighted_accuracy = plane_alignment.make_gaussian_weights(max + 1)
-        weighted_accuracy = [weighted_accuracy[int(y)] for y in section_numbers]
-    elif method == None:
-        weighted_accuracy = [1 for y in section_numbers]
-
+    weighted_accuracy = calculate_weighted_accuracy(section_numbers, section_depth, species, None, method)
     section_thicknesses = depth_spacing / number_spacing
-    if len(section_numbers) <= 2:
-      weighted_accuracy = [1, 1]
     average_thickness = np.average(section_thicknesses, weights = weighted_accuracy[1:])
     return average_thickness
 
 
+
+
+
+
 def ideal_spacing(
     section_numbers: List[Union[int, float]],
     section_depth: List[Union[int, float]],
     average_thickness: Union[int, float],
+    bad_sections: List[bool] = None,
     method = "weighted",
     species = "mouse"
 ) -> float:
@@ -82,17 +77,8 @@ def ideal_spacing(
     # unaligned voxel position of section numbers (evenly spaced depths)
     index_spaced_depth = section_numbers * average_thickness
     # average distance between the depths and the evenly spaced depths
-    if species == "mouse":
-        min, max = 0, 528
-    elif species == "rat":
-        min, max = 0, 1024
-    if method == "weighted":
-        weighted_accuracy = plane_alignment.make_gaussian_weights(max + 1)
-        weighted_accuracy = [weighted_accuracy[int(y)] for y in section_numbers]
-    elif method == None:
-        weighted_accuracy = [1 for y in section_numbers]
-    if len(section_numbers) <= 2:
-        weighted_accuracy = [0.5, 0.5]
+    
+    weighted_accuracy = calculate_weighted_accuracy(section_numbers, section_depth, species, bad_sections, method)
     distance_to_ideal = np.average(section_depth - index_spaced_depth, weights = weighted_accuracy)
     # adjust the evenly spaced depths to minimise their distance to the predicted depths
     ideal_index_spaced_depth = index_spaced_depth + distance_to_ideal
@@ -138,16 +124,26 @@ def enforce_section_ordering(predictions):
         depths = np.array(depths)
         direction = determine_direction_of_indexing(depths)
         predictions["depths"] = depths
-
+        
         temp = predictions.copy()
         if direction == "caudal-rostro":
             ascending = False
         if direction == "rostro-caudal":
             ascending = True
-        temp = temp.sort_values(by=["depths"], ascending=ascending).reset_index(
-            drop=True
-        )
-        predictions["oy"] = temp["oy"]
+        if "bad_section" in temp:
+            temp_good = temp[temp["bad_section"] == False].copy().reset_index(drop=True)
+            temp_good_copy = temp_good.copy()
+            temp_good_copy = temp_good_copy.sort_values(by=["depths"], ascending=ascending).reset_index(
+                drop=True
+            )
+            temp_good["oy"] = temp_good_copy["oy"]
+            
+            predictions.loc[predictions["bad_section"] == False, "oy"] = temp_good["oy"].values
+        else:
+            temp = temp.sort_values(by=["depths"], ascending=ascending).reset_index(drop=True)
+
+        
+            predictions["oy"] = temp["oy"].values
     return predictions
 
 
@@ -164,7 +160,7 @@ def space_according_to_index(predictions, section_thickness = None, voxel_size =
     """
     if voxel_size == None:
         raise ValueError("voxel_size must be specified")
-    if section_thickness != None:
+    if section_thickness is not None:
         section_thickness/=voxel_size
     predictions["oy"] = predictions["oy"].astype(float)
     if len(predictions) == 1:
@@ -174,12 +170,16 @@ def space_according_to_index(predictions, section_thickness = None, voxel_size =
             "No section indexes found, cannot space according to a missing index. You likely did not run predict() with section_numbers=True"
         )
     else:
+        if 'bad_section' in predictions:
+            bad_sections = predictions['bad_section'].values
+        else:
+            bad_sections = None
         predictions = enforce_section_ordering(predictions)
         depths = calculate_brain_center_depths(predictions)
         depths = np.array(depths)
         if not section_thickness:
             section_thickness = calculate_average_section_thickness(
-                predictions["nr"], section_depth = depths, species=species
+                predictions["nr"], section_depth = depths, bad_sections=bad_sections, species=species
             )
             if not suppress:
                 print(f'predicted thickness is {section_thickness * voxel_size}µm')
@@ -187,7 +187,7 @@ def space_according_to_index(predictions, section_thickness = None, voxel_size =
             if not suppress:
                 print(f'specified thickness is {section_thickness * voxel_size}µm')
 
-        calculated_spacing = ideal_spacing(predictions["nr"], depths, section_thickness, None, species)
+        calculated_spacing = ideal_spacing(predictions["nr"], depths, section_thickness, bad_sections, species=species)
         distance_to_ideal = calculated_spacing - depths
         predictions["oy"] = predictions["oy"] + distance_to_ideal
     return predictions
@@ -223,21 +223,40 @@ def number_sections(filenames: List[str], legacy=False) -> List[int]:
     return section_numbers
 
 
-def set_bad_sections_util(df: pd.DataFrame, bad_sections: List[str]) -> pd.DataFrame:
+def set_bad_sections_util(df: pd.DataFrame, bad_sections: List[str], auto = False) -> pd.DataFrame:
     """
     Sets the damaged sections and sections which deepslice may not perform well on for a series of predictions
     
     :param bad_sections: List of bad sections
     :param df: dataframe of predictions
+    :param auto: automatically set bad sections based on if theyre badly positioned relative to their section index
     :type bad_sections: List[int]
     :type df: pandas.DataFrame
+    :type auto: bool
     :return: the input dataframe with bad sections labeled as such
     :rtype: pandas.DataFrame
     """
+
     bad_section_indexes = [
-        df.Filenames.contains(bad_section) for bad_section in bad_sections
+        df.Filenames.str.contains(bad_section) for bad_section in bad_sections
     ]
+    if np.any([np.sum(x)>1 for x in bad_section_indexes]):
+        raise ValueError("Multiple sections match the same bad section string, make sure each bad section string is unique")
+    bad_section_indexes = [np.where(x)[0] for x in bad_section_indexes]
+    bad_section_indexes = np.concatenate(bad_section_indexes)
+    df.loc[~df.index.isin(bad_section_indexes), "bad_section"] = False
+    if auto:
+        df['depths'] = calculate_brain_center_depths(df)
+        x = df['nr'].values
+        y = df['depths'].values
+        m,b = np.polyfit(x,y,1)
+        residuals = y - (m*x + b)
+        outliers = np.abs(residuals) > 1.5*np.std(residuals)
+        df.loc[outliers, 'bad_section'] = True
+        
     df.loc[bad_section_indexes, "bad_section"] = True
+    # make the other sections are False
+    
     bad_sections_found = np.sum(bad_section_indexes)
     # Tell the user which sections were identified as bad
     if bad_sections_found > 0:
@@ -246,3 +265,39 @@ def set_bad_sections_util(df: pd.DataFrame, bad_sections: List[str]) -> pd.DataF
         They are:\n {df.Filenames[bad_section_indexes]}"
         )
     return df
+
+
+def calculate_weighted_accuracy(section_numbers: List[int], depths: List[float], species: str, bad_sections: List[Optional[bool]]  = None, method: str = "weighted") -> List[float]:
+    """
+    Calculates the weighted accuracy of a list of section numbers for a given species
+    
+    :param section_numbers: List of section numbers
+    :param species: Species to calculate accuracy for
+    :param bad_sections: List of bad sections
+    :param method: Method to use for weighting, defaults to "weighted"
+    :type section_numbers: List[int]
+    :type species: str
+    :type bad_sections: List[Optional[bool]]
+    :type method: str
+    :return: List of weighted accuracies
+    :rtype: List[float]
+    """
+    if species == "mouse":
+        min, max = 0, 528
+    elif species == "rat":
+        min, max = 0, 1024
+    if method == "weighted":
+        weighted_accuracy = plane_alignment.make_gaussian_weights(max + 1)
+        depths = np.array(depths)
+        depths[depths < min] = min
+        depths[depths > max] = max
+        weighted_accuracy = [weighted_accuracy[int(y)] for y in depths]
+    elif method is None:
+        weighted_accuracy = [1 for y in section_numbers]
+    if len(section_numbers) <= 2:
+        weighted_accuracy = [0.5, 0.5]
+        
+    if bad_sections is not None:
+        weighted_accuracy = [x if y == False else 0 for x,y in zip(weighted_accuracy,bad_sections)]
+        
+    return weighted_accuracy
\ No newline at end of file
diff --git a/DeepSlice/main.py b/DeepSlice/main.py
index 581b4ae..d04b6b8 100644
--- a/DeepSlice/main.py
+++ b/DeepSlice/main.py
@@ -101,7 +101,7 @@ def predict(
         self.predictions = predictions
         self.image_directory = image_directory
 
-    def set_bad_sections(self, bad_sections: list):
+    def set_bad_sections(self, bad_sections: list, auto = False):
         """
         sets the bad sections for a given brain. Must be run after predict()
 
@@ -109,7 +109,7 @@ def set_bad_sections(self, bad_sections: list):
         :type bad_sections: list
         """
         self.predictions = spacing_and_indexing.set_bad_sections_util(
-            self.predictions, bad_sections
+            self.predictions, bad_sections, auto
         )
 
     def enforce_index_order(self):