From 09f5aa1d7b178447a936e28df5251952395609b6 Mon Sep 17 00:00:00 2001
From: Hendrik <hendrik.moeller@tum.de>
Date: Fri, 10 Nov 2023 08:59:51 +0100
Subject: [PATCH 1/6] overhaul done, untested

---
 __setup.py                                    |  44 +++
 panoptica/__init__.py                         |   1 +
 panoptica/assd_test.py                        |  70 +++++
 panoptica/evaluator.py                        | 259 +++++++++---------
 panoptica/instance_approximator.py            |  48 ++++
 panoptica/instance_evaluator.py               |  79 ++++++
 panoptica/instance_matcher.py                 | 150 ++++++++++
 panoptica/seg_test.ipynb                      |  73 +++++
 .../semantic_evaluation/semantic_evaluator.py |  20 +-
 panoptica/utils/__init__.py                   |   6 +
 panoptica/utils/assd.py                       | 102 +++++++
 .../connected_component_backends.py           |   0
 panoptica/utils/constants.py                  |  31 +++
 panoptica/utils/datatypes.py                  | 155 +++++++++++
 panoptica/{utils.py => utils/metrics.py}      |  73 ++---
 panoptica/utils/numpy.py                      |  40 +++
 16 files changed, 983 insertions(+), 168 deletions(-)
 create mode 100644 __setup.py
 create mode 100644 panoptica/assd_test.py
 create mode 100644 panoptica/instance_approximator.py
 create mode 100644 panoptica/instance_evaluator.py
 create mode 100644 panoptica/instance_matcher.py
 create mode 100644 panoptica/seg_test.ipynb
 create mode 100644 panoptica/utils/__init__.py
 create mode 100644 panoptica/utils/assd.py
 rename panoptica/{semantic_evaluation => utils}/connected_component_backends.py (100%)
 create mode 100644 panoptica/utils/constants.py
 create mode 100644 panoptica/utils/datatypes.py
 rename panoptica/{utils.py => utils/metrics.py} (59%)
 create mode 100644 panoptica/utils/numpy.py

diff --git a/__setup.py b/__setup.py
new file mode 100644
index 0000000..06273eb
--- /dev/null
+++ b/__setup.py
@@ -0,0 +1,44 @@
+from setuptools import setup
+
+setup(
+    name="BIDS",
+    version="0.0.2",
+    author="Robert Graf",
+    author_email="robert.graf@tum.de",
+    packages=["BIDS", "BIDS.test"],
+    # scripts=["bin/script1", "bin/script2"],
+    # url="http://pypi.python.org/pypi/PackageName/",
+    license="LICENSE.txt",
+    description="A collection of tools, that work with files in a (weak) BIDS standard",
+    long_description=open("README.md").read(),
+    install_requires=[
+        "pathlib",
+        "pytest",
+        "nibabel",
+        "numpy",
+        "antspyx",
+        "typing_extensions",
+        "scipy",
+        "dataclasses",
+        "SimpleITK",
+        "matplotlib",
+        "dicom2nifti",
+        "func_timeout",
+        "dill",
+    ],
+)
+
+# Build from source:
+# python setup.py build
+# And install:
+# python setup.py install
+# Under Development
+# Develop mode is really, really nice:
+# $ python setup.py develop
+# sudo python3 setup.py develop
+# or:
+# $ pip install -e ./
+
+# which python
+#
+# sudo /home/robert/anaconda3/envs/py3.10/bin/python setup.py develop
diff --git a/panoptica/__init__.py b/panoptica/__init__.py
index 5dc773e..f89fd6c 100644
--- a/panoptica/__init__.py
+++ b/panoptica/__init__.py
@@ -5,3 +5,4 @@
 from panoptica.semantic_evaluation.semantic_evaluator import (
     SemanticSegmentationEvaluator,
 )
+from repo.panoptica.utils.assd import my_assd
diff --git a/panoptica/assd_test.py b/panoptica/assd_test.py
new file mode 100644
index 0000000..df861c2
--- /dev/null
+++ b/panoptica/assd_test.py
@@ -0,0 +1,70 @@
+from medpy import metric
+from BIDS import NII
+from BIDS.core.np_utils import np_extract_label
+
+# from panoptica.assd import my_assd
+from repo.panoptica.utils.assd import my_assd
+from time import perf_counter
+import numpy as np
+from multiprocessing import Pool
+
+gt = "/media/hendrik/be5e95dd-27c8-4c31-adc5-7b75f8ebd5c5/data/hendrik/panoptica/data/gt/verse012_seg.nii.gz"
+
+pred = "/media/hendrik/be5e95dd-27c8-4c31-adc5-7b75f8ebd5c5/data/hendrik/panoptica/data/submissions/christian_payer/docker_phase2/results/verse012_seg.nii.gz"
+
+
+def extract_both(pred_arr, gt_arr, label: int):
+    pred_l = np_extract_label(pred_arr, label, inplace=False)
+    gt_l = np_extract_label(gt_arr, label, inplace=False)
+    return pred_l, gt_l
+
+
+pred_nii = NII.load(pred, seg=True)
+pred_nii.map_labels_({l: idx + 1 for idx, l in enumerate(pred_nii.unique())}, verbose=False)
+gt_nii = NII.load(gt, seg=True)
+gt_nii.map_labels_({l: idx + 1 for idx, l in enumerate(gt_nii.unique())}, verbose=False)
+
+pred_arr = pred_nii.get_seg_array()
+gt_arr = gt_nii.get_seg_array()
+
+iterations = 3
+
+medpy_result = 1.5266468819541414
+
+time_medpy = []
+time_my = []
+
+labels = pred_nii.unique()
+
+for i in range(iterations):
+    start = perf_counter()
+    # label_list = [l for l in labels if l in gt_arr]
+    pairs = (extract_both(pred_arr, gt_arr, l) for l in labels if l in gt_arr)
+    # for l in label_list:
+    #    pred_l = np_extract_label(pred_arr, l, inplace=False)
+    #    gt_l = np_extract_label(gt_arr, l, inplace=False)
+    #    result = metric.assd(result=pred_l, reference=gt_l)
+    result = [metric.assd(p[0], p[1]) for p in pairs]
+    time = perf_counter() - start
+    time_medpy.append(time)
+#
+# mine is faster, speedup my_assd even more?
+# TODO try this pooling with my vertebra segmentation, make all pairs for dice calculation
+#
+for i in range(iterations):
+    start = perf_counter()
+    with Pool() as pool:
+        pairs = (extract_both(pred_arr, gt_arr, l) for l in labels if l in gt_arr)
+
+        assd_values = pool.starmap(my_assd, pairs)
+    # result2 = my_assd(result=pred_arr, reference=gt_arr)
+    time = perf_counter() - start
+    time_my.append(time)
+    # assert result2 == medpy_result
+
+print(np.average(time_medpy))
+print(np.average(time_my))
+
+print(result)
+print()
+print(assd_values)
diff --git a/panoptica/evaluator.py b/panoptica/evaluator.py
index 9e19ac9..ea970c0 100644
--- a/panoptica/evaluator.py
+++ b/panoptica/evaluator.py
@@ -2,134 +2,141 @@
 
 import numpy as np
 
-from panoptica.result import PanopticaResult
+from result import PanopticaResult
+from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, ProcessingPair, ProcessingPairInstanced
+from instance_approximator import InstanceApproximator
+from instance_matcher import InstanceMatchingAlgorithm
+from instance_evaluator import evaluate_matched_instance
+from timing import measure_time
 
 
-class Evaluator(ABC):
-    """
-    Abstract base class for evaluating instance segmentation results.
-
-    Subclasses of this Evaluator should implement the abstract methods 'evaluate' and 'count_number_of_instances'.
+class Panoptic_Evaluator:
+    def __init__(
+        self,
+        expected_input: type(SemanticPair) | type(UnmatchedInstancePair) | type(MatchedInstancePair),
+        instance_approximator: InstanceApproximator | None,
+        instance_matcher: InstanceMatchingAlgorithm | None,
+        iou_threshold: float = 0.5,
+    ) -> None:
+        self.__expected_input = expected_input
+        self.__instance_approximator = instance_approximator
+        self.__instance_matcher = instance_matcher
+        self.__iou_threshold = iou_threshold
+
+    @measure_time
+    def evaluate(self, processing_pair: ProcessingPair) -> tuple[PanopticaResult, dict[str, ProcessingPair]]:
+        assert type(processing_pair) == self.__expected_input, f"input not of expected type {self.__expected_input}"
+        return panoptic_evaluate(
+            processing_pair=processing_pair,
+            instance_approximator=self.__instance_approximator,
+            instance_matcher=self.__instance_matcher,
+            iou_threshold=self.__iou_threshold,
+        )
+
+
+def panoptic_evaluate(
+    processing_pair: SemanticPair | UnmatchedInstancePair | MatchedInstancePair | PanopticaResult,
+    instance_approximator: InstanceApproximator | None,
+    instance_matcher: InstanceMatchingAlgorithm | None,
+    iou_threshold: float,
+    verbose: bool = False,
+    **kwargs,
+) -> tuple[PanopticaResult, dict[str, ProcessingPair]]:
+    debug_data: dict[str, ProcessingPair] = {}
+    # First Phase: Instance Approximation
+    if isinstance(processing_pair, PanopticaResult):
+        return processing_pair, debug_data
+
+    if isinstance(processing_pair, SemanticPair):
+        assert instance_approximator is not None, "Got SemanticPair but not InstanceApproximator"
+        processing_pair = instance_approximator.approximate_instances(processing_pair)
+        debug_data["UnmatchedInstanceMap"] = processing_pair.copy()
+
+    # Second Phase: Instance Matching
+    if isinstance(processing_pair, UnmatchedInstancePair):
+        processing_pair = _handle_zero_instances_cases(processing_pair)
+
+    if isinstance(processing_pair, UnmatchedInstancePair):
+        assert instance_matcher is not None, "Got UnmatchedInstancePair but not InstanceMatchingAlgorithm"
+        processing_pair = instance_matcher.match_instances(processing_pair)
+        debug_data["MatchedInstanceMap"] = processing_pair.copy()
+
+    # Third Phase: Instance Evaluation
+    if isinstance(processing_pair, MatchedInstancePair):
+        processing_pair = _handle_zero_instances_cases(processing_pair)
+
+    if isinstance(processing_pair, MatchedInstancePair):
+        processing_pair = evaluate_matched_instance(processing_pair, iou_threshold=iou_threshold)
+
+    if isinstance(processing_pair, PanopticaResult):
+        return processing_pair, debug_data
+
+    raise RuntimeError("End of panoptic pipeline reached without results")
+
+
+def _handle_zero_instances_cases(
+    processing_pair: UnmatchedInstancePair | MatchedInstancePair,
+) -> UnmatchedInstancePair | MatchedInstancePair | PanopticaResult:
     """
+    Handle edge cases when comparing reference and prediction masks.
 
-    def __init__(self):
-        pass
+    Args:
+        num_ref_instances (int): Number of instances in the reference mask.
+        num_pred_instances (int): Number of instances in the prediction mask.
 
-    @abstractmethod
-    def evaluate(
-        self,
-        reference_mask: np.ndarray,
-        prediction_mask: np.ndarray,
-        iou_threshold: float,
-    ) -> PanopticaResult:
-        """
-        Evaluate the instance segmentation results based on the reference and prediction masks.
-
-        Args:
-            reference_mask (np.ndarray): Binary mask representing reference instances.
-            prediction_mask (np.ndarray): Binary mask representing prediction instances.
-            iou_threshold (float): IoU threshold for considering a prediction as a true positive.
-
-        Returns:
-            PanopticaResult: Result object with evaluation metrics.
-        """
-        pass
-
-    def _handle_edge_cases(
-        self, num_ref_instances: int, num_pred_instances: int
-    ) -> PanopticaResult:
-        """
-        Handle edge cases when comparing reference and prediction masks.
-
-        Args:
-            num_ref_instances (int): Number of instances in the reference mask.
-            num_pred_instances (int): Number of instances in the prediction mask.
-
-        Returns:
-            PanopticaResult: Result object with evaluation metrics.
-        """
-        # Handle cases where either the reference or the prediction is empty
-        if num_ref_instances == 0 and num_pred_instances == 0:
-            # Both references and predictions are empty, perfect match
-            return PanopticaResult(
-                num_ref_instances=0,
-                num_pred_instances=0,
-                tp=0,
-                dice_list=[],
-                iou_list=[],
-            )
-        if num_ref_instances == 0:
-            # All references are missing, only false positives
-            return PanopticaResult(
-                num_ref_instances=0,
-                num_pred_instances=num_pred_instances,
-                tp=0,
-                dice_list=[],
-                iou_list=[],
-            )
-        if num_pred_instances == 0:
-            # All predictions are missing, only false negatives
-            return PanopticaResult(
-                num_ref_instances=num_ref_instances,
-                num_pred_instances=0,
-                tp=0,
-                dice_list=[],
-                iou_list=[],
-            )
-
-    def _compute_iou(self, reference: np.ndarray, prediction: np.ndarray) -> float:
-        """
-        Compute Intersection over Union (IoU) between two masks.
-
-        Args:
-            reference (np.ndarray): Reference mask.
-            prediction (np.ndarray): Prediction mask.
-
-        Returns:
-            float: IoU between the two masks. A value between 0 and 1, where higher values
-            indicate better overlap and similarity between masks.
-        """
-        intersection = np.logical_and(reference, prediction)
-        union = np.logical_or(reference, prediction)
-
-        union_sum = np.sum(union)
-
-        # Handle division by zero
-        if union_sum == 0:
-            return 0.0
-
-        iou = np.sum(intersection) / union_sum
-        return iou
-
-    def _compute_dice_coefficient(
-        self,
-        reference: np.ndarray,
-        prediction: np.ndarray,
-    ) -> float:
-        """
-        Compute the Dice coefficient between two binary masks.
-
-        The Dice coefficient measures the similarity or overlap between two binary masks.
-        It is defined as:
-
-        Dice = (2 * intersection) / (area_mask1 + area_mask2)
-
-        Args:
-            reference (np.ndarray): Reference binary mask.
-            prediction (np.ndarray): Prediction binary mask.
-
-        Returns:
-            float: Dice coefficient between the two binary masks. A value between 0 and 1, where higher values
-            indicate better overlap and similarity between masks.
-        """
-        intersection = np.logical_and(reference, prediction)
-        reference_mask = np.sum(reference)
-        prediction_mask = np.sum(prediction)
-
-        # Handle division by zero
-        if reference_mask == 0 and prediction_mask == 0:
-            return 0.0
-
-        # Calculate Dice coefficient
-        dice = 2 * np.sum(intersection) / (reference_mask + prediction_mask)
-        return dice
+    Returns:
+        PanopticaResult: Result object with evaluation metrics.
+    """
+    n_reference_instance = processing_pair.n_reference_instance
+    n_prediction_instance = processing_pair.n_prediction_instance
+    # Handle cases where either the reference or the prediction is empty
+    if n_prediction_instance == 0 or n_reference_instance == 0:
+        # Both references and predictions are empty, perfect match
+        return PanopticaResult(
+            num_ref_instances=0,
+            num_pred_instances=0,
+            tp=0,
+            dice_list=[],
+            iou_list=[],
+        )
+    if n_reference_instance == 0:
+        # All references are missing, only false positives
+        return PanopticaResult(
+            num_ref_instances=0,
+            num_pred_instances=n_prediction_instance,
+            tp=0,
+            dice_list=[],
+            iou_list=[],
+        )
+    if n_prediction_instance == 0:
+        # All predictions are missing, only false negatives
+        return PanopticaResult(
+            num_ref_instances=n_reference_instance,
+            num_pred_instances=0,
+            tp=0,
+            dice_list=[],
+            iou_list=[],
+        )
+    return processing_pair
+
+
+if __name__ == "__main__":
+    from instance_approximator import ConnectedComponentsInstanceApproximator, CCABackend
+    from instance_matcher import NaiveOneToOneMatching
+    from instance_evaluator import evaluate_matched_instance
+
+    a = np.zeros([50, 50], dtype=int)
+    b = a.copy()
+    a[20:40, 10:20] = 1
+    b[20:35, 10:20] = 2
+
+    sample = SemanticPair(b, a)
+
+    evaluator = Panoptic_Evaluator(
+        expected_input=SemanticPair,
+        instance_approximator=ConnectedComponentsInstanceApproximator(cca_backend=CCABackend.cc3d),
+        instance_matcher=NaiveOneToOneMatching(),
+    )
+
+    result, debug_data = evaluator.evaluate(sample)
+    print(result)
diff --git a/panoptica/instance_approximator.py b/panoptica/instance_approximator.py
new file mode 100644
index 0000000..96fa751
--- /dev/null
+++ b/panoptica/instance_approximator.py
@@ -0,0 +1,48 @@
+from abc import abstractmethod, ABC
+from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
+from utils.connected_component_backends import CCABackend
+import numpy as np
+
+
+class InstanceApproximator(ABC):
+    @abstractmethod
+    def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+        pass
+
+    def approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+        # TODO call _approx
+        max_value = max(np.max(prediction_arr), np.max(reference_arr))
+        # reduce to smallest uint
+
+
+class ConnectedComponentsInstanceApproximator(InstanceApproximator):
+    def __init__(self, cca_backend: CCABackend) -> None:
+        self.cca_backend = cca_backend
+
+    def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair:
+        prediction_arr, n_prediction_instance = _connected_components(semantic_pair.prediction_arr, self.cca_backend)
+        reference_arr, n_reference_instance = _connected_components(semantic_pair.reference_arr, self.cca_backend)
+        return UnmatchedInstancePair(
+            prediction_arr=prediction_arr,
+            reference_arr=reference_arr,
+            n_prediction_instance=n_prediction_instance,
+            n_reference_instance=n_reference_instance,
+        )
+
+
+def _connected_components(
+    array: np.ndarray,
+    cca_backend: CCABackend,
+) -> tuple[np.ndarray, int]:
+    if cca_backend == CCABackend.cc3d:
+        import cc3d
+
+        cc_arr, n_instances = cc3d.connected_components(array, return_N=True)
+    elif cca_backend == CCABackend.scipy:
+        from scipy.ndimage import label
+
+        cc_arr, n_instances = label(array)
+    else:
+        raise NotImplementedError(cca_backend)
+
+    return cc_arr, n_instances
diff --git a/panoptica/instance_evaluator.py b/panoptica/instance_evaluator.py
new file mode 100644
index 0000000..e21a200
--- /dev/null
+++ b/panoptica/instance_evaluator.py
@@ -0,0 +1,79 @@
+from abc import abstractmethod, ABC
+import concurrent.futures
+from utils.datatypes import MatchedInstancePair
+from result import PanopticaResult
+from timing import measure_time
+import numpy as np
+from utils.metrics import _compute_iou, _compute_dice_coefficient
+
+
+def evaluate_matched_instance(semantic_pair: MatchedInstancePair, iou_threshold: float, **kwargs) -> PanopticaResult:
+    # Initialize variables for True Positives (tp)
+    tp, dice_list, iou_list = 0, [], []
+
+    reference_arr, prediction_arr = semantic_pair.reference_arr, semantic_pair.prediction_arr
+    ref_labels = semantic_pair.ref_labels
+
+    # Use concurrent.futures.ThreadPoolExecutor for parallelization
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [
+            executor.submit(
+                _evaluate_instance,
+                reference_arr,
+                prediction_arr,
+                ref_idx,
+                iou_threshold,
+            )
+            for ref_idx in ref_labels
+        ]
+
+        for future in concurrent.futures.as_completed(futures):
+            tp_i, dice_i, iou_i = future.result()
+            tp += tp_i
+            if dice_i is not None:
+                dice_list.append(dice_i)
+            if iou_i is not None:
+                iou_list.append(iou_i)
+    # Create and return the PanopticaResult object with computed metrics
+    return PanopticaResult(
+        num_ref_instances=semantic_pair.n_reference_instance,
+        num_pred_instances=semantic_pair.n_prediction_instance,
+        tp=tp,
+        dice_list=dice_list,
+        iou_list=iou_list,
+    )
+
+
+def _evaluate_instance(
+    ref_labels: np.ndarray,
+    pred_labels: np.ndarray,
+    ref_idx: int,
+    iou_threshold: float,
+) -> tuple[int, float, float]:
+    """
+    Evaluate a single instance.
+
+    Args:
+        ref_labels (np.ndarray): Reference instance segmentation mask.
+        pred_labels (np.ndarray): Predicted instance segmentation mask.
+        ref_idx (int): The label of the current instance.
+        iou_threshold (float): The IoU threshold for considering a match.
+
+    Returns:
+        Tuple[int, float, float]: Tuple containing True Positives (int), Dice coefficient (float), and IoU (float).
+    """
+    iou = _compute_iou(
+        reference=ref_labels == ref_idx,
+        prediction=pred_labels == ref_idx,
+    )
+    if iou > iou_threshold:
+        tp = 1
+        dice = _compute_dice_coefficient(
+            reference=ref_labels == ref_idx,
+            prediction=pred_labels == ref_idx,
+        )
+    else:
+        tp = 0
+        dice = None
+
+    return tp, dice, iou
diff --git a/panoptica/instance_matcher.py b/panoptica/instance_matcher.py
new file mode 100644
index 0000000..4226814
--- /dev/null
+++ b/panoptica/instance_matcher.py
@@ -0,0 +1,150 @@
+from abc import abstractmethod, ABC
+from utils.datatypes import UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map, uint_type
+from multiprocessing import Pool
+import numpy as np
+from utils import _unique_without_zeros
+from utils import _compute_instance_iou
+from scipy.optimize import linear_sum_assignment
+
+
+class InstanceMatchingAlgorithm(ABC):
+    @abstractmethod
+    def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+        pass
+
+    def match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> MatchedInstancePair:
+        instance_labelmap = self._match_instances(unmatched_instance_pair, **kwargs)
+        print("instance_labelmap", instance_labelmap)
+        return map_instance_labels(unmatched_instance_pair.copy(), instance_labelmap)
+
+
+class NaiveOneToOneMatching(InstanceMatchingAlgorithm):
+    def __init__(self, iou_threshold: float = 0.5) -> None:
+        assert iou_threshold >= 0.5, "NaiveOneToOneMatching: iou_threshold lower than 0.5 doesnt work!"
+        assert iou_threshold < 1.0, "NaiveOneToOneMatching: iou_threshold greater than or equal to 1.0 doesnt work!"
+        self.iou_threshold = iou_threshold
+
+    def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+        ref_labels = unmatched_instance_pair.ref_labels
+        pred_labels = unmatched_instance_pair.pred_labels
+        iou_matrix = _calc_iou_matrix(
+            unmatched_instance_pair.prediction_arr,
+            unmatched_instance_pair.reference_arr,
+            ref_labels,
+            pred_labels,
+        )
+        # Use linear_sum_assignment to find the best matches
+        ref_indices, pred_indices = linear_sum_assignment(-iou_matrix)
+
+        # Initialize variables for True Positives (tp) and False Positives (fp)
+        tp, iou_list = 0, []
+        labelmap: Instance_Label_Map = []
+
+        # Loop through matched instances to compute PQ components
+        for ref_idx, pred_idx in zip(ref_indices, pred_indices):
+            iou = iou_matrix[ref_idx][pred_idx]
+            if iou >= self.iou_threshold:
+                # Match found, increment true positive count and collect IoU and Dice values
+                tp += 1
+                iou_list.append(iou)
+                labelmap.append(([ref_labels[ref_idx]], [pred_labels[pred_idx]]))
+                # map label ref_idx to pred_idx
+        return labelmap
+
+
+def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels, pred_labels):
+    num_ref_instances = len(ref_labels)
+    num_pred_instances = len(pred_labels)
+
+    # Create a pool of worker processes to parallelize the computation
+    with Pool() as pool:
+        # Generate all possible pairs of instance indices for IoU computation
+        instance_pairs = [
+            (reference_arr, prediction_arr, ref_idx, pred_idx)
+            for ref_idx in range(1, num_ref_instances + 1)
+            for pred_idx in range(1, num_pred_instances + 1)
+        ]
+
+        # Calculate IoU for all instance pairs in parallel using starmap
+        iou_values = pool.starmap(_compute_instance_iou, instance_pairs)
+
+    # Reshape the resulting IoU values into a matrix
+    iou_matrix = np.array(iou_values).reshape((num_ref_instances, num_pred_instances))
+    return iou_matrix
+
+
+def map_instance_labels(processing_pair: UnmatchedInstancePair, labelmap: Instance_Label_Map) -> MatchedInstancePair:
+    prediction_arr, reference_arr = processing_pair.prediction_arr, processing_pair.reference_arr
+
+    ref_labels = processing_pair.ref_labels
+    pred_labels = processing_pair.pred_labels
+
+    missed_ref_labels = []
+    missed_pred_labels = []
+
+    pred_labelmap = {}
+    ref_labelmap = {}
+    label_counter = 1
+    # Go over instance labelmap and assign the matched instance sequentially
+    for refs, preds in labelmap:
+        for r, p in zip(refs, preds):
+            ref_labelmap[r] = label_counter
+            pred_labelmap[p] = label_counter
+        label_counter += 1
+    n_matched_instances = label_counter - 1
+    # assign missed instances to next unused labels sequentially
+    for r in ref_labels:
+        if r not in ref_labelmap:
+            ref_labelmap[r] = label_counter
+            label_counter += 1
+            missed_ref_labels.append(r)
+    for p in pred_labels:
+        if p not in pred_labelmap:
+            pred_labelmap[p] = label_counter
+            label_counter += 1
+            missed_ref_labels.append(p)
+
+    # Using the labelmap, actually change the labels in the array here
+    prediction_arr_relabeled = _map_labels(prediction_arr, pred_labelmap)
+    reference_arr_relabeled = _map_labels(reference_arr, ref_labelmap)
+
+    # Build a MatchedInstancePair out of the newly derived data
+    matched_instance_pair = MatchedInstancePair(
+        prediction_arr=prediction_arr_relabeled,
+        reference_arr=reference_arr_relabeled,
+        missed_reference_labels=missed_ref_labels,
+        missed_prediction_labels=missed_pred_labels,
+        n_prediction_instance=processing_pair.n_prediction_instance,
+        n_reference_instance=processing_pair.n_reference_instance,
+        n_matched_instances=n_matched_instances,
+    )
+    return matched_instance_pair
+
+
+def _map_labels(arr: np.ndarray, label_map: dict[np.integer, np.integer]) -> np.ndarray:
+    """Maps labels in the given array according to the label_map dictionary.
+    Args:
+        label_map (dict): A dictionary that maps the original label values (str or int) to the new label values (int).
+
+    Returns:
+        np.ndarray: Returns a copy of the remapped array
+    """
+    data = arr.copy()
+    for v in np.unique(data):
+        if v in label_map:  # int needed to match non-integer data-types
+            data[arr == v] = label_map[v]
+    return data
+
+
+if __name__ == "__main__":
+    a = np.array([0, 1, 2, 3, 4, 5], dtype=np.uint16)
+    b = np.array([0, 1, 2, 6, 3, 7], dtype=np.uint16)
+    MatchedInstancePair(
+        reference_arr=a,
+        prediction_arr=b,
+        missed_reference_labels=[4, 5],
+        missed_prediction_labels=[6, 7],
+        n_matched_instances=3,
+        n_prediction_instance=5,
+        n_reference_instance=5,
+    )
diff --git a/panoptica/seg_test.ipynb b/panoptica/seg_test.ipynb
new file mode 100644
index 0000000..79c59d5
--- /dev/null
+++ b/panoptica/seg_test.ipynb
@@ -0,0 +1,73 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{1: 1, 2: 2, 3: 2, 4: 3}\n",
+      "{1: 1, 3: 2, 5: 3}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from instance_matcher import _map_instance_labels\n",
+    "import numpy as np\n",
+    "reference_arr = np.ndarray([\n",
+    "    0,1,2,3,4\n",
+    "])\n",
+    "pred_arr = np.ndarray([\n",
+    "    0,1,0,3,5\n",
+    "])\n",
+    "\n",
+    "#prediction 2 -> 3,4 ref\n",
+    "\n",
+    "labelmap = [([1],[1]),([2,3],[3]),([4],[5])]\n",
+    "#_map_instance_labels(pred_arr, reference_arr, labelmap=labelmap)\n",
+    "ref_labelmap = {}\n",
+    "pred_labelmap = {}\n",
+    "\n",
+    "for idx, (refs, preds) in enumerate(labelmap):\n",
+    "    for r in refs:\n",
+    "        ref_labelmap[r] = idx + 1\n",
+    "    for p in preds:\n",
+    "        pred_labelmap[p] = idx + 1\n",
+    "\n",
+    "print(ref_labelmap)\n",
+    "print(pred_labelmap)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nakoseg",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/panoptica/semantic_evaluation/semantic_evaluator.py b/panoptica/semantic_evaluation/semantic_evaluator.py
index dd2a171..781a28d 100644
--- a/panoptica/semantic_evaluation/semantic_evaluator.py
+++ b/panoptica/semantic_evaluation/semantic_evaluator.py
@@ -79,9 +79,7 @@ def evaluate(
             iou_values = pool.starmap(self._compute_instance_iou, instance_pairs)
 
         # Reshape the resulting IoU values into a matrix
-        iou_matrix = np.array(iou_values).reshape(
-            (num_ref_instances, num_pred_instances)
-        )
+        iou_matrix = np.array(iou_values).reshape((num_ref_instances, num_pred_instances))
 
         # Use linear_sum_assignment to find the best matches
         ref_indices, pred_indices = linear_sum_assignment(-iou_matrix)
@@ -101,8 +99,8 @@ def evaluate(
                 dice = self._compute_instance_dice_coefficient(
                     ref_labels=ref_labels,
                     pred_labels=pred_labels,
-                    ref_instance_idx=ref_idx + 1,
-                    pred_instance_idx=pred_idx + 1,
+                    ref_instance_label=ref_idx + 1,
+                    pred_instance_label=pred_idx + 1,
                 )
                 dice_list.append(dice)
 
@@ -168,8 +166,8 @@ def _compute_instance_dice_coefficient(
         self,
         ref_labels: np.ndarray,
         pred_labels: np.ndarray,
-        ref_instance_idx: int,
-        pred_instance_idx: int,
+        ref_instance_label: int,
+        pred_instance_label: int,
     ) -> float:
         """
         Compute the Dice coefficient between a specific pair of instances.
@@ -182,15 +180,15 @@ def _compute_instance_dice_coefficient(
         Args:
             ref_labels (np.ndarray): Reference instance labels.
             pred_labels (np.ndarray): Prediction instance labels.
-            ref_instance_idx (int): Index of the reference instance.
-            pred_instance_idx (int): Index of the prediction instance.
+            ref_instance_label (int): Label of the reference instance.
+            pred_instance_label (int): Label of the prediction instance.
 
         Returns:
             float: Dice coefficient between the specified instances. A value between 0 and 1, where higher values
             indicate better overlap and similarity between instances.
         """
-        ref_instance_mask = ref_labels == ref_instance_idx
-        pred_instance_mask = pred_labels == pred_instance_idx
+        ref_instance_mask = ref_labels == ref_instance_label
+        pred_instance_mask = pred_labels == pred_instance_label
 
         return self._compute_dice_coefficient(
             reference=ref_instance_mask,
diff --git a/panoptica/utils/__init__.py b/panoptica/utils/__init__.py
new file mode 100644
index 0000000..f4ab7c0
--- /dev/null
+++ b/panoptica/utils/__init__.py
@@ -0,0 +1,6 @@
+from utils.numpy import _count_unique_without_zeros, _unique_without_zeros
+from utils.connected_component_backends import CCABackend
+from utils.datatypes import *
+from utils.metrics import _compute_instance_volumetric_dice, _compute_instance_iou, _compute_dice_coefficient, _compute_iou
+
+# from utils.constants import
diff --git a/panoptica/utils/assd.py b/panoptica/utils/assd.py
new file mode 100644
index 0000000..37e20fd
--- /dev/null
+++ b/panoptica/utils/assd.py
@@ -0,0 +1,102 @@
+import numpy as np
+from scipy.ndimage import _ni_support
+from scipy.ndimage.morphology import distance_transform_edt, generate_binary_structure, binary_erosion
+from scipy.ndimage._nd_image import euclidean_feature_transform
+
+
+def my_assd(result, reference, voxelspacing=None, connectivity=1):
+    assd = np.mean((asd(result, reference, voxelspacing, connectivity), asd(reference, result, voxelspacing, connectivity)))
+    return assd
+
+
+def asd(result, reference, voxelspacing=None, connectivity=1):
+    sds = __surface_distances(result, reference, voxelspacing, connectivity)
+    asd = sds.mean()
+    return asd
+
+
+def __surface_distances(result, reference, voxelspacing=None, connectivity=1):
+    """
+    The distances between the surface voxel of binary objects in result and their
+    nearest partner surface voxel of a binary object in reference.
+    """
+    result = np.atleast_1d(result.astype(bool))
+    reference = np.atleast_1d(reference.astype(bool))
+    if voxelspacing is not None:
+        voxelspacing = _ni_support._normalize_sequence(voxelspacing, result.ndim)
+        voxelspacing = np.asarray(voxelspacing, dtype=np.float64)
+        if not voxelspacing.flags.contiguous:
+            voxelspacing = voxelspacing.copy()
+
+    # binary structure
+    footprint = generate_binary_structure(result.ndim, connectivity)
+
+    # test for emptiness
+    # if 0 == np.count_nonzero(result):
+    #    raise RuntimeError("The first supplied array does not contain any binary object.")
+    # if 0 == np.count_nonzero(reference):
+    #    raise RuntimeError("The second supplied array does not contain any binary object.")
+
+    # extract only 1-pixel border line of objects
+    result_border = result ^ binary_erosion(result, structure=footprint, iterations=1)
+    reference_border = reference ^ binary_erosion(reference, structure=footprint, iterations=1)
+
+    # compute average surface distance
+    # Note: scipys distance transform is calculated only inside the borders of the
+    #       foreground objects, therefore the input has to be reversed
+    dt = _distance_transform_edt(~reference_border, sampling=None)
+    sds = dt[result_border]
+
+    return sds
+
+
+def _distance_transform_edt(input: np.ndarray, sampling=None, return_distances=True, return_indices=False):
+    # calculate the feature transform
+    # input = np.atleast_1d(np.where(input, 1, 0).astype(np.int8))
+    # if sampling is not None:
+    #    sampling = _ni_support._normalize_sequence(sampling, input.ndim)
+    #    sampling = np.asarray(sampling, dtype=np.float64)
+    #    if not sampling.flags.contiguous:
+    #        sampling = sampling.copy()
+
+    ft = np.zeros((input.ndim,) + input.shape, dtype=np.int32)
+
+    euclidean_feature_transform(input, sampling, ft)
+    # if requested, calculate the distance transform
+    if return_distances:
+        dt = ft - np.indices(input.shape, dtype=ft.dtype)
+        dt = dt.astype(np.float64)
+        # if sampling is not None:
+        #    for ii in range(len(sampling)):
+        #        dt[ii, ...] *= sampling[ii]
+        np.multiply(dt, dt, dt)
+
+        dt = np.add.reduce(dt, axis=0)
+        dt = np.sqrt(dt)
+
+    # construct and return the result
+    result = []
+    if return_distances:
+        result.append(dt)
+    if return_indices:
+        result.append(ft)
+
+    if len(result) == 2:
+        return tuple(result)
+    elif len(result) == 1:
+        return result[0]
+    else:
+        return None
+
+
+def _distance_tranform_arg_check(distances_out, indices_out, return_distances, return_indices):
+    """Raise a RuntimeError if the arguments are invalid"""
+    error_msgs = []
+    if (not return_distances) and (not return_indices):
+        error_msgs.append("at least one of return_distances/return_indices must be True")
+    if distances_out and not return_distances:
+        error_msgs.append("return_distances must be True if distances is supplied")
+    if indices_out and not return_indices:
+        error_msgs.append("return_indices must be True if indices is supplied")
+    if error_msgs:
+        raise RuntimeError(", ".join(error_msgs))
diff --git a/panoptica/semantic_evaluation/connected_component_backends.py b/panoptica/utils/connected_component_backends.py
similarity index 100%
rename from panoptica/semantic_evaluation/connected_component_backends.py
rename to panoptica/utils/connected_component_backends.py
diff --git a/panoptica/utils/constants.py b/panoptica/utils/constants.py
new file mode 100644
index 0000000..7342375
--- /dev/null
+++ b/panoptica/utils/constants.py
@@ -0,0 +1,31 @@
+from enum import Enum, auto
+from typing_extensions import Self
+
+
+class Enum_Compare(Enum):
+    def __eq__(self, __value: object) -> bool:
+        if isinstance(__value, Enum):
+            return self.name == __value.name and self.value == __value.value
+        elif isinstance(__value, str):
+            return self.name == __value
+        else:
+            return False
+
+    def __str__(self) -> str:
+        return f"{type(self).__name__}.{self.name}"
+
+    def __repr__(self) -> str:
+        return str(self)
+
+    def __hash__(self) -> int:
+        return self.value
+
+
+# class Datatype_Status(Enum_Compare):
+#    Semantic_Map = auto()
+#    Unmatched_Instance_Map = auto()
+#    Matched_Instance_Map = auto()
+
+
+if __name__ == "__main__":
+    #print(Datatype_Status.Semantic_Map)
diff --git a/panoptica/utils/datatypes.py b/panoptica/utils/datatypes.py
new file mode 100644
index 0000000..4379a4a
--- /dev/null
+++ b/panoptica/utils/datatypes.py
@@ -0,0 +1,155 @@
+from typing import Any, Self
+import numpy as np
+from numpy import dtype
+from abc import ABC
+import warnings
+from utils import _count_unique_without_zeros, _unique_without_zeros
+
+uint_type: type = np.unsignedinteger
+int_type: type = np.integer
+
+
+class ProcessingPair(ABC):
+    prediction_arr: np.ndarray
+    reference_arr: np.ndarray
+    # unique labels without zero
+    ref_labels: tuple[int]
+    pred_labels: tuple[int]
+
+    def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None) -> None:
+        _check_array_integrity(prediction_arr, reference_arr, dtype=dtype)
+        self.prediction_arr = prediction_arr
+        self.reference_arr = reference_arr
+        self.ref_labels = tuple(_unique_without_zeros(reference_arr))
+        self.pred_labels = tuple(_unique_without_zeros(prediction_arr))
+
+    # Make all variables read-only!
+    def __setattr__(self, attr, value):
+        if hasattr(self, attr):
+            raise Exception("Attempting to alter read-only value")
+
+        self.__dict__[attr] = value
+
+
+class ProcessingPairInstanced(ProcessingPair):
+    n_prediction_instance: int
+    n_reference_instance: int
+
+    def __init__(
+        self,
+        prediction_arr: np.ndarray,
+        reference_arr: np.ndarray,
+        dtype: type | None,
+        n_prediction_instance: int | None = None,
+        n_reference_instance: int | None = None,
+    ) -> None:
+        # reduce to lowest uint
+        super().__init__(prediction_arr, reference_arr, dtype)
+        if n_prediction_instance is None:
+            self.n_prediction_instance = _count_unique_without_zeros(prediction_arr)
+
+        else:
+            self.n_prediction_instance = n_prediction_instance
+        if n_reference_instance is None:
+            self.n_reference_instance = _count_unique_without_zeros(reference_arr)
+        else:
+            self.n_reference_instance = n_reference_instance
+
+    def copy(self):
+        return type(self)(
+            prediction_arr=self.prediction_arr,
+            reference_arr=self.reference_arr,
+            n_prediction_instance=self.n_prediction_instance,
+            n_reference_instance=self.n_reference_instance,
+        )
+
+
+def _check_array_integrity(prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None = None):
+    assert isinstance(prediction_arr, np.ndarray) and isinstance(
+        reference_arr, np.ndarray
+    ), "prediction and/or reference are not numpy arrays"
+    assert prediction_arr.shape == reference_arr.shape, f"shape mismatch, got {prediction_arr.shape},{reference_arr.shape}"
+    assert prediction_arr.dtype == reference_arr.dtype, f"dtype mismatch, got {prediction_arr.dtype},{reference_arr.dtype}"
+    if dtype is not None:
+        assert (
+            np.issubdtype(prediction_arr.dtype, dtype)
+            and np.issubdtype(reference_arr.dtype, dtype)
+            # prediction_arr.dtype == dtype and reference_arr.dtype == dtype
+        ), f"prediction and/or reference are not dtype {dtype}, got {prediction_arr.dtype} and {reference_arr.dtype}"
+
+
+class SemanticPair(ProcessingPair):
+    """A Processing pair of any dtype
+
+    Args:
+        ProcessingPair (_type_): _description_
+    """
+
+    def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray) -> None:
+        super().__init__(prediction_arr, reference_arr, dtype=int_type)
+
+
+class UnmatchedInstancePair(ProcessingPairInstanced):
+    """A Processing pair of any unsigned (but matching) integer type
+
+    Args:
+        ProcessingPairInstanced (_type_): _description_
+    """
+
+    def __init__(
+        self,
+        prediction_arr: np.ndarray,
+        reference_arr: np.ndarray,
+        n_prediction_instance: int | None = None,
+        n_reference_instance: int | None = None,
+    ) -> None:
+        super().__init__(prediction_arr, reference_arr, uint_type, n_prediction_instance, n_reference_instance)  # type:ignore
+
+
+class MatchedInstancePair(ProcessingPairInstanced):
+    """A Processing pair of any unsigned (but matching) integer type consisting of only matched instance labels, as well as a list of missed labels from both
+
+    Args:
+        ProcessingPairInstanced (_type_): _description_
+    """
+
+    missed_reference_labels: list[int]
+    missed_prediction_labels: list[int]
+    n_matched_instances: int
+
+    def __init__(
+        self,
+        prediction_arr: np.ndarray,
+        reference_arr: np.ndarray,
+        missed_reference_labels: list[int],
+        missed_prediction_labels: list[int],
+        n_matched_instances: int,
+        n_prediction_instance: int | None = None,
+        n_reference_instance: int | None = None,
+    ) -> None:
+        self.missed_reference_labels = missed_reference_labels
+        self.missed_prediction_labels = missed_prediction_labels
+        self.n_matched_instances = n_matched_instances
+        super().__init__(prediction_arr, reference_arr, uint_type, n_prediction_instance, n_reference_instance)  # type:ignore
+
+    def copy(self):
+        return type(self)(
+            prediction_arr=self.prediction_arr,
+            reference_arr=self.reference_arr,
+            n_prediction_instance=self.n_prediction_instance,
+            n_reference_instance=self.n_reference_instance,
+            missed_reference_labels=self.missed_reference_labels,
+            missed_prediction_labels=self.missed_prediction_labels,
+            n_matched_instances=self.n_matched_instances,
+        )
+
+
+# Mapping ((prediction_label, ...), (reference_label, ...))
+Instance_Label_Map = list[tuple[list[uint_type], list[uint_type]]]
+
+
+if __name__ == "__main__":
+    n = np.zeros([50, 50], dtype=np.int32)
+    a = SemanticPair(n, n)
+    print(a)
+    # print(a.prediction_arr)
diff --git a/panoptica/utils.py b/panoptica/utils/metrics.py
similarity index 59%
rename from panoptica/utils.py
rename to panoptica/utils/metrics.py
index 7c4f475..c6b861a 100644
--- a/panoptica/utils.py
+++ b/panoptica/utils/metrics.py
@@ -1,9 +1,4 @@
-import warnings
-from typing import Tuple
-
-import cc3d
 import numpy as np
-from scipy import ndimage
 
 
 def _compute_instance_volumetric_dice(
@@ -74,43 +69,59 @@ def _compute_instance_iou(
     return iou
 
 
-def _label_instances(mask: np.ndarray, cca_backend: str) -> Tuple[np.ndarray, int]:
+def _compute_iou(reference: np.ndarray, prediction: np.ndarray) -> float:
     """
-    Label connected components in a segmentation mask.
+    Compute Intersection over Union (IoU) between two masks.
 
     Args:
-        mask (np.ndarray): segmentation mask (2D or 3D array).
-        cca_backend (str): Backend for connected components labeling. Should be "cc3d" or "scipy".
+        reference (np.ndarray): Reference mask.
+        prediction (np.ndarray): Prediction mask.
 
     Returns:
-        Tuple[np.ndarray, int]:
-            - Labeled mask with instances
-            - Number of instances found
+        float: IoU between the two masks. A value between 0 and 1, where higher values
+        indicate better overlap and similarity between masks.
     """
-    if cca_backend == "cc3d":
-        labeled, num_instances = cc3d.connected_components(mask, return_N=True)
-    elif cca_backend == "scipy":
-        labeled, num_instances = ndimage.label(mask)
-    else:
-        raise NotImplementedError(f"Unsupported cca_backend: {cca_backend}")
-    return labeled, num_instances
+    intersection = np.logical_and(reference, prediction)
+    union = np.logical_or(reference, prediction)
+
+    union_sum = np.sum(union)
+
+    # Handle division by zero
+    if union_sum == 0:
+        return 0.0
+
+    iou = np.sum(intersection) / union_sum
+    return iou
 
 
-def _count_unique_without_zeros(arr: np.ndarray) -> int:
+def _compute_dice_coefficient(
+    reference: np.ndarray,
+    prediction: np.ndarray,
+) -> float:
     """
-    Count the number of unique elements in the input NumPy array, excluding zeros.
+    Compute the Dice coefficient between two binary masks.
+
+    The Dice coefficient measures the similarity or overlap between two binary masks.
+    It is defined as:
+
+    Dice = (2 * intersection) / (area_mask1 + area_mask2)
 
     Args:
-        arr (np.ndarray): Input array.
+        reference (np.ndarray): Reference binary mask.
+        prediction (np.ndarray): Prediction binary mask.
 
     Returns:
-        int: Number of unique elements excluding zeros.
+        float: Dice coefficient between the two binary masks. A value between 0 and 1, where higher values
+        indicate better overlap and similarity between masks.
     """
-    if np.any(arr < 0):
-        warnings.warn("Negative values are present in the input array.")
-
-    unique_elements = np.unique(arr)
-    if 0 in unique_elements:
-        return len(unique_elements) - 1
-    else:
-        return len(unique_elements)
+    intersection = np.logical_and(reference, prediction)
+    reference_mask = np.sum(reference)
+    prediction_mask = np.sum(prediction)
+
+    # Handle division by zero
+    if reference_mask == 0 and prediction_mask == 0:
+        return 0.0
+
+    # Calculate Dice coefficient
+    dice = 2 * np.sum(intersection) / (reference_mask + prediction_mask)
+    return dice
diff --git a/panoptica/utils/numpy.py b/panoptica/utils/numpy.py
new file mode 100644
index 0000000..1410988
--- /dev/null
+++ b/panoptica/utils/numpy.py
@@ -0,0 +1,40 @@
+import warnings
+from typing import Tuple
+
+import cc3d
+import numpy as np
+from scipy import ndimage
+
+
+def _unique_without_zeros(arr: np.ndarray) -> np.ndarray:
+    """
+    Get unique non-zero values from a NumPy array.
+
+    Parameters:
+        arr (np.ndarray): Input NumPy array.
+
+    Returns:
+        np.ndarray: Unique non-zero values from the input array.
+
+    Issues a warning if negative values are present.
+    """
+    if np.any(arr < 0):
+        warnings.warn("Negative values are present in the input array.")
+
+    return np.unique(arr[arr != 0])
+
+
+def _count_unique_without_zeros(arr: np.ndarray) -> int:
+    """
+    Count the number of unique elements in the input NumPy array, excluding zeros.
+
+    Args:
+        arr (np.ndarray): Input array.
+
+    Returns:
+        int: Number of unique elements excluding zeros.
+    """
+    if np.any(arr < 0):
+        warnings.warn("Negative values are present in the input array.")
+
+    return len(_unique_without_zeros(arr))

From 0e8490c2744cdc33655587abd6b171f8496d42e7 Mon Sep 17 00:00:00 2001
From: Hendrik <hendrik.moeller@tum.de>
Date: Fri, 10 Nov 2023 10:28:26 +0100
Subject: [PATCH 2/6] rough cleaning, adapted examples to new code framework

---
 __setup.py                                    |  44 ----
 examples/example_cfos_3d.py                   |  15 +-
 examples/example_spine_instance.py            |  22 +-
 panoptica/__init__.py                         |  13 +-
 panoptica/_functionals.py                     |  58 ++++++
 panoptica/assd_test.py                        |  70 -------
 panoptica/evaluator.py                        |   8 +-
 panoptica/instance_approximator.py            |  35 ++--
 .../instance_evaluation/instance_evaluator.py | 145 -------------
 panoptica/instance_evaluator.py               |   2 -
 panoptica/instance_matcher.py                 |  40 +---
 panoptica/seg_test.ipynb                      |  73 -------
 .../semantic_evaluation/semantic_evaluator.py | 196 ------------------
 panoptica/utils/__init__.py                   |   3 +-
 .../utils/connected_component_backends.py     |  18 --
 panoptica/utils/constants.py                  |  27 ++-
 panoptica/utils/datatypes.py                  |  10 +-
 panoptica/utils/numpy.py                      |  12 ++
 18 files changed, 139 insertions(+), 652 deletions(-)
 delete mode 100644 __setup.py
 create mode 100644 panoptica/_functionals.py
 delete mode 100644 panoptica/assd_test.py
 delete mode 100644 panoptica/instance_evaluation/instance_evaluator.py
 delete mode 100644 panoptica/seg_test.ipynb
 delete mode 100644 panoptica/semantic_evaluation/semantic_evaluator.py
 delete mode 100644 panoptica/utils/connected_component_backends.py

diff --git a/__setup.py b/__setup.py
deleted file mode 100644
index 06273eb..0000000
--- a/__setup.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from setuptools import setup
-
-setup(
-    name="BIDS",
-    version="0.0.2",
-    author="Robert Graf",
-    author_email="robert.graf@tum.de",
-    packages=["BIDS", "BIDS.test"],
-    # scripts=["bin/script1", "bin/script2"],
-    # url="http://pypi.python.org/pypi/PackageName/",
-    license="LICENSE.txt",
-    description="A collection of tools, that work with files in a (weak) BIDS standard",
-    long_description=open("README.md").read(),
-    install_requires=[
-        "pathlib",
-        "pytest",
-        "nibabel",
-        "numpy",
-        "antspyx",
-        "typing_extensions",
-        "scipy",
-        "dataclasses",
-        "SimpleITK",
-        "matplotlib",
-        "dicom2nifti",
-        "func_timeout",
-        "dill",
-    ],
-)
-
-# Build from source:
-# python setup.py build
-# And install:
-# python setup.py install
-# Under Development
-# Develop mode is really, really nice:
-# $ python setup.py develop
-# sudo python3 setup.py develop
-# or:
-# $ pip install -e ./
-
-# which python
-#
-# sudo /home/robert/anaconda3/envs/py3.10/bin/python setup.py develop
diff --git a/examples/example_cfos_3d.py b/examples/example_cfos_3d.py
index 7f1424a..c91f06f 100644
--- a/examples/example_cfos_3d.py
+++ b/examples/example_cfos_3d.py
@@ -1,6 +1,6 @@
 from auxiliary.nifti.io import read_nifti
 
-from panoptica import CCABackend, SemanticSegmentationEvaluator
+from panoptica import *
 
 pred_masks = read_nifti(
     input_nifti_path="/home/florian/flow/cfos_analysis/data/ablation/2021-11-25_23-50-56_2021-10-25_19-38-31_tr_dice_bce_11/patchvolume_695_2.nii.gz"
@@ -9,11 +9,14 @@
     input_nifti_path="/home/florian/flow/cfos_analysis/data/reference/patchvolume_695_2/patchvolume_695_2_binary.nii.gz",
 )
 
-eva = SemanticSegmentationEvaluator(cca_backend=CCABackend.cc3d)
-res = eva.evaluate(
-    reference_mask=ref_masks,
-    prediction_mask=pred_masks,
+sample = SemanticPair(pred_masks, ref_masks)
+
+evaluator = Panoptic_Evaluator(
+    expected_input=SemanticPair,
+    instance_approximator=ConnectedComponentsInstanceApproximator(cca_backend=CCABackend.cc3d),
+    instance_matcher=NaiveOneToOneMatching(),
     iou_threshold=0.5,
 )
 
-print(res)
+result, debug_data = evaluator.evaluate(sample)
+print(result)
diff --git a/examples/example_spine_instance.py b/examples/example_spine_instance.py
index 4131e20..cedaf18 100644
--- a/examples/example_spine_instance.py
+++ b/examples/example_spine_instance.py
@@ -1,21 +1,19 @@
 from auxiliary.nifti.io import read_nifti
 
-from panoptica import InstanceSegmentationEvaluator
+from panoptica import *
 
-ref_masks = read_nifti(
-    "examples/spine_seg/instance/sub-0007_mod-T2w_seg-vert_msk.nii.gz"
-)
-pred_masks = read_nifti(
-    "examples/spine_seg/instance/sub-0007_mod-T2w_seg-vert_msk_new.nii.gz"
-)
+ref_masks = read_nifti("examples/spine_seg/instance/sub-0007_mod-T2w_seg-vert_msk.nii.gz")
+pred_masks = read_nifti("examples/spine_seg/instance/sub-0007_mod-T2w_seg-vert_msk_new.nii.gz")
 
 
-eva = InstanceSegmentationEvaluator()
+sample = SemanticPair(pred_masks, ref_masks)
 
-res = eva.evaluate(
-    reference_mask=ref_masks,
-    prediction_mask=pred_masks,
+evaluator = Panoptic_Evaluator(
+    expected_input=UnmatchedInstancePair,
+    instance_approximator=None,
+    instance_matcher=NaiveOneToOneMatching(),
     iou_threshold=0.5,
 )
 
-print(res)
+result, debug_data = evaluator.evaluate(sample)
+print(result)
diff --git a/panoptica/__init__.py b/panoptica/__init__.py
index f89fd6c..a39a116 100644
--- a/panoptica/__init__.py
+++ b/panoptica/__init__.py
@@ -1,8 +1,5 @@
-from panoptica.instance_evaluation.instance_evaluator import (
-    InstanceSegmentationEvaluator,
-)
-from panoptica.semantic_evaluation.connected_component_backends import CCABackend
-from panoptica.semantic_evaluation.semantic_evaluator import (
-    SemanticSegmentationEvaluator,
-)
-from repo.panoptica.utils.assd import my_assd
+from instance_approximator import ConnectedComponentsInstanceApproximator, CCABackend
+from instance_matcher import NaiveOneToOneMatching
+from evaluator import Panoptic_Evaluator
+from result import PanopticaResult
+from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
diff --git a/panoptica/_functionals.py b/panoptica/_functionals.py
new file mode 100644
index 0000000..aa4aa4c
--- /dev/null
+++ b/panoptica/_functionals.py
@@ -0,0 +1,58 @@
+import numpy as np
+from multiprocessing import Pool
+from utils.metrics import _compute_instance_iou
+from utils.constants import CCABackend
+
+
+def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels, pred_labels):
+    num_ref_instances = len(ref_labels)
+    num_pred_instances = len(pred_labels)
+
+    # Create a pool of worker processes to parallelize the computation
+    with Pool() as pool:
+        # Generate all possible pairs of instance indices for IoU computation
+        instance_pairs = [
+            (reference_arr, prediction_arr, ref_idx, pred_idx)
+            for ref_idx in range(1, num_ref_instances + 1)
+            for pred_idx in range(1, num_pred_instances + 1)
+        ]
+
+        # Calculate IoU for all instance pairs in parallel using starmap
+        iou_values = pool.starmap(_compute_instance_iou, instance_pairs)
+
+    # Reshape the resulting IoU values into a matrix
+    iou_matrix = np.array(iou_values).reshape((num_ref_instances, num_pred_instances))
+    return iou_matrix
+
+
+def _map_labels(arr: np.ndarray, label_map: dict[np.integer, np.integer]) -> np.ndarray:
+    """Maps labels in the given array according to the label_map dictionary.
+    Args:
+        label_map (dict): A dictionary that maps the original label values (str or int) to the new label values (int).
+
+    Returns:
+        np.ndarray: Returns a copy of the remapped array
+    """
+    data = arr.copy()
+    for v in np.unique(data):
+        if v in label_map:  # int needed to match non-integer data-types
+            data[arr == v] = label_map[v]
+    return data
+
+
+def _connected_components(
+    array: np.ndarray,
+    cca_backend: CCABackend,
+) -> tuple[np.ndarray, int]:
+    if cca_backend == CCABackend.cc3d:
+        import cc3d
+
+        cc_arr, n_instances = cc3d.connected_components(array, return_N=True)
+    elif cca_backend == CCABackend.scipy:
+        from scipy.ndimage import label
+
+        cc_arr, n_instances = label(array)
+    else:
+        raise NotImplementedError(cca_backend)
+
+    return cc_arr, n_instances
diff --git a/panoptica/assd_test.py b/panoptica/assd_test.py
deleted file mode 100644
index df861c2..0000000
--- a/panoptica/assd_test.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from medpy import metric
-from BIDS import NII
-from BIDS.core.np_utils import np_extract_label
-
-# from panoptica.assd import my_assd
-from repo.panoptica.utils.assd import my_assd
-from time import perf_counter
-import numpy as np
-from multiprocessing import Pool
-
-gt = "/media/hendrik/be5e95dd-27c8-4c31-adc5-7b75f8ebd5c5/data/hendrik/panoptica/data/gt/verse012_seg.nii.gz"
-
-pred = "/media/hendrik/be5e95dd-27c8-4c31-adc5-7b75f8ebd5c5/data/hendrik/panoptica/data/submissions/christian_payer/docker_phase2/results/verse012_seg.nii.gz"
-
-
-def extract_both(pred_arr, gt_arr, label: int):
-    pred_l = np_extract_label(pred_arr, label, inplace=False)
-    gt_l = np_extract_label(gt_arr, label, inplace=False)
-    return pred_l, gt_l
-
-
-pred_nii = NII.load(pred, seg=True)
-pred_nii.map_labels_({l: idx + 1 for idx, l in enumerate(pred_nii.unique())}, verbose=False)
-gt_nii = NII.load(gt, seg=True)
-gt_nii.map_labels_({l: idx + 1 for idx, l in enumerate(gt_nii.unique())}, verbose=False)
-
-pred_arr = pred_nii.get_seg_array()
-gt_arr = gt_nii.get_seg_array()
-
-iterations = 3
-
-medpy_result = 1.5266468819541414
-
-time_medpy = []
-time_my = []
-
-labels = pred_nii.unique()
-
-for i in range(iterations):
-    start = perf_counter()
-    # label_list = [l for l in labels if l in gt_arr]
-    pairs = (extract_both(pred_arr, gt_arr, l) for l in labels if l in gt_arr)
-    # for l in label_list:
-    #    pred_l = np_extract_label(pred_arr, l, inplace=False)
-    #    gt_l = np_extract_label(gt_arr, l, inplace=False)
-    #    result = metric.assd(result=pred_l, reference=gt_l)
-    result = [metric.assd(p[0], p[1]) for p in pairs]
-    time = perf_counter() - start
-    time_medpy.append(time)
-#
-# mine is faster, speedup my_assd even more?
-# TODO try this pooling with my vertebra segmentation, make all pairs for dice calculation
-#
-for i in range(iterations):
-    start = perf_counter()
-    with Pool() as pool:
-        pairs = (extract_both(pred_arr, gt_arr, l) for l in labels if l in gt_arr)
-
-        assd_values = pool.starmap(my_assd, pairs)
-    # result2 = my_assd(result=pred_arr, reference=gt_arr)
-    time = perf_counter() - start
-    time_my.append(time)
-    # assert result2 == medpy_result
-
-print(np.average(time_medpy))
-print(np.average(time_my))
-
-print(result)
-print()
-print(assd_values)
diff --git a/panoptica/evaluator.py b/panoptica/evaluator.py
index ea970c0..98225cc 100644
--- a/panoptica/evaluator.py
+++ b/panoptica/evaluator.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from result import PanopticaResult
-from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, ProcessingPair, ProcessingPairInstanced
+from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, _ProcessingPair, _ProcessingPairInstanced
 from instance_approximator import InstanceApproximator
 from instance_matcher import InstanceMatchingAlgorithm
 from instance_evaluator import evaluate_matched_instance
@@ -24,7 +24,7 @@ def __init__(
         self.__iou_threshold = iou_threshold
 
     @measure_time
-    def evaluate(self, processing_pair: ProcessingPair) -> tuple[PanopticaResult, dict[str, ProcessingPair]]:
+    def evaluate(self, processing_pair: _ProcessingPair) -> tuple[PanopticaResult, dict[str, _ProcessingPair]]:
         assert type(processing_pair) == self.__expected_input, f"input not of expected type {self.__expected_input}"
         return panoptic_evaluate(
             processing_pair=processing_pair,
@@ -41,8 +41,8 @@ def panoptic_evaluate(
     iou_threshold: float,
     verbose: bool = False,
     **kwargs,
-) -> tuple[PanopticaResult, dict[str, ProcessingPair]]:
-    debug_data: dict[str, ProcessingPair] = {}
+) -> tuple[PanopticaResult, dict[str, _ProcessingPair]]:
+    debug_data: dict[str, _ProcessingPair] = {}
     # First Phase: Instance Approximation
     if isinstance(processing_pair, PanopticaResult):
         return processing_pair, debug_data
diff --git a/panoptica/instance_approximator.py b/panoptica/instance_approximator.py
index 96fa751..f6a5318 100644
--- a/panoptica/instance_approximator.py
+++ b/panoptica/instance_approximator.py
@@ -1,6 +1,7 @@
 from abc import abstractmethod, ABC
 from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
-from utils.connected_component_backends import CCABackend
+from _functionals import _connected_components, CCABackend
+from utils.numpy import _get_smallest_fitting_uint
 import numpy as np
 
 
@@ -10,9 +11,17 @@ def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> Unmat
         pass
 
     def approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
-        # TODO call _approx
-        max_value = max(np.max(prediction_arr), np.max(reference_arr))
-        # reduce to smallest uint
+        # Call algorithm
+        instance_pair = self._approximate_instances(semantic_pair, **kwargs)
+        # Check validity
+        min_value = min(np.min(instance_pair.pred_labels), np.min(instance_pair.ref_labels))
+        assert min_value >= 0, "There are negative values in the semantic maps. This is not allowed!"
+        # Set dtype to smalles fitting uint
+        max_value = max(np.max(instance_pair.pred_labels), np.max(instance_pair.ref_labels))
+        dtype = _get_smallest_fitting_uint(max_value)
+        instance_pair.prediction_arr.astype(dtype)
+        instance_pair.reference_arr.astype(dtype)
+        return instance_pair
 
 
 class ConnectedComponentsInstanceApproximator(InstanceApproximator):
@@ -28,21 +37,3 @@ def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> Unmat
             n_prediction_instance=n_prediction_instance,
             n_reference_instance=n_reference_instance,
         )
-
-
-def _connected_components(
-    array: np.ndarray,
-    cca_backend: CCABackend,
-) -> tuple[np.ndarray, int]:
-    if cca_backend == CCABackend.cc3d:
-        import cc3d
-
-        cc_arr, n_instances = cc3d.connected_components(array, return_N=True)
-    elif cca_backend == CCABackend.scipy:
-        from scipy.ndimage import label
-
-        cc_arr, n_instances = label(array)
-    else:
-        raise NotImplementedError(cca_backend)
-
-    return cc_arr, n_instances
diff --git a/panoptica/instance_evaluation/instance_evaluator.py b/panoptica/instance_evaluation/instance_evaluator.py
deleted file mode 100644
index 11740af..0000000
--- a/panoptica/instance_evaluation/instance_evaluator.py
+++ /dev/null
@@ -1,145 +0,0 @@
-from __future__ import annotations
-
-import concurrent.futures
-import warnings
-from typing import Tuple
-
-import numpy as np
-
-from panoptica.evaluator import Evaluator
-from panoptica.result import PanopticaResult
-from panoptica.timing import measure_time
-
-
-class InstanceSegmentationEvaluator(Evaluator):
-    """
-    Evaluator for instance segmentation results.
-
-    This class extends the Evaluator class and provides methods for evaluating instance segmentation masks
-    using metrics such as Intersection over Union (IoU) and Dice coefficient.
-
-    Methods:
-        evaluate(reference_mask, prediction_mask, iou_threshold): Evaluate the instance segmentation masks.
-        _unique_without_zeros(arr): Get unique non-zero values from a NumPy array.
-
-    """
-
-    def __init__(self):
-        # TODO consider initializing evaluator with metrics it should compute
-        pass
-
-    @measure_time
-    def evaluate(
-        self,
-        reference_mask: np.ndarray,
-        prediction_mask: np.ndarray,
-        iou_threshold: float,
-    ) -> PanopticaResult:
-        """
-        Evaluate the intersection over union (IoU) and Dice coefficient for instance segmentation masks.
-
-        Args:
-            reference_mask (np.ndarray): The reference instance segmentation mask.
-            prediction_mask (np.ndarray): The predicted instance segmentation mask.
-            iou_threshold (float): The IoU threshold for considering a match.
-
-        Returns:
-            PanopticaResult: A named tuple containing evaluation results.
-        """
-        ref_labels = reference_mask
-        ref_nonzero_unique_labels = self._unique_without_zeros(arr=ref_labels)
-        num_ref_instances = len(ref_nonzero_unique_labels)
-
-        pred_labels = prediction_mask
-        pred_nonzero_unique_labels = self._unique_without_zeros(arr=pred_labels)
-        num_pred_instances = len(pred_nonzero_unique_labels)
-
-        self._handle_edge_cases(
-            num_ref_instances=num_ref_instances,
-            num_pred_instances=num_pred_instances,
-        )
-
-        # Initialize variables for True Positives (tp)
-        tp, dice_list, iou_list = 0, [], []
-
-        # Use concurrent.futures.ThreadPoolExecutor for parallelization
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            futures = [
-                executor.submit(
-                    self._evaluate_instance,
-                    ref_labels,
-                    pred_labels,
-                    ref_idx,
-                    iou_threshold,
-                )
-                for ref_idx in ref_nonzero_unique_labels
-            ]
-
-            for future in concurrent.futures.as_completed(futures):
-                tp_i, dice_i, iou_i = future.result()
-                tp += tp_i
-                if dice_i is not None:
-                    dice_list.append(dice_i)
-                if iou_i is not None:
-                    iou_list.append(iou_i)
-
-        # Create and return the PanopticaResult object with computed metrics
-        return PanopticaResult(
-            num_ref_instances=num_ref_instances,
-            num_pred_instances=num_pred_instances,
-            tp=tp,
-            dice_list=dice_list,
-            iou_list=iou_list,
-        )
-
-    def _evaluate_instance(
-        self,
-        ref_labels: np.ndarray,
-        pred_labels: np.ndarray,
-        ref_idx: int,
-        iou_threshold: float,
-    ) -> Tuple[int, float, float]:
-        """
-        Evaluate a single instance.
-
-        Args:
-            ref_labels (np.ndarray): Reference instance segmentation mask.
-            pred_labels (np.ndarray): Predicted instance segmentation mask.
-            ref_idx (int): The label of the current instance.
-            iou_threshold (float): The IoU threshold for considering a match.
-
-        Returns:
-            Tuple[int, float, float]: Tuple containing True Positives (int), Dice coefficient (float), and IoU (float).
-        """
-        iou = self._compute_iou(
-            reference=ref_labels == ref_idx,
-            prediction=pred_labels == ref_idx,
-        )
-        if iou > iou_threshold:
-            tp = 1
-            dice = self._compute_dice_coefficient(
-                reference=ref_labels == ref_idx,
-                prediction=pred_labels == ref_idx,
-            )
-        else:
-            tp = 0
-            dice = None
-
-        return tp, dice, iou
-
-    def _unique_without_zeros(self, arr: np.ndarray) -> np.ndarray:
-        """
-        Get unique non-zero values from a NumPy array.
-
-        Parameters:
-            arr (np.ndarray): Input NumPy array.
-
-        Returns:
-            np.ndarray: Unique non-zero values from the input array.
-
-        Issues a warning if negative values are present.
-        """
-        if np.any(arr < 0):
-            warnings.warn("Negative values are present in the input array.")
-
-        return np.unique(arr[arr != 0])
diff --git a/panoptica/instance_evaluator.py b/panoptica/instance_evaluator.py
index e21a200..f530ced 100644
--- a/panoptica/instance_evaluator.py
+++ b/panoptica/instance_evaluator.py
@@ -1,8 +1,6 @@
-from abc import abstractmethod, ABC
 import concurrent.futures
 from utils.datatypes import MatchedInstancePair
 from result import PanopticaResult
-from timing import measure_time
 import numpy as np
 from utils.metrics import _compute_iou, _compute_dice_coefficient
 
diff --git a/panoptica/instance_matcher.py b/panoptica/instance_matcher.py
index 4226814..ce85866 100644
--- a/panoptica/instance_matcher.py
+++ b/panoptica/instance_matcher.py
@@ -2,8 +2,8 @@
 from utils.datatypes import UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map, uint_type
 from multiprocessing import Pool
 import numpy as np
-from utils import _unique_without_zeros
-from utils import _compute_instance_iou
+from utils import _unique_without_zeros, _compute_instance_iou
+from _functionals import _map_labels, _calc_iou_matrix
 from scipy.optimize import linear_sum_assignment
 
 
@@ -52,27 +52,6 @@ def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwa
         return labelmap
 
 
-def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels, pred_labels):
-    num_ref_instances = len(ref_labels)
-    num_pred_instances = len(pred_labels)
-
-    # Create a pool of worker processes to parallelize the computation
-    with Pool() as pool:
-        # Generate all possible pairs of instance indices for IoU computation
-        instance_pairs = [
-            (reference_arr, prediction_arr, ref_idx, pred_idx)
-            for ref_idx in range(1, num_ref_instances + 1)
-            for pred_idx in range(1, num_pred_instances + 1)
-        ]
-
-        # Calculate IoU for all instance pairs in parallel using starmap
-        iou_values = pool.starmap(_compute_instance_iou, instance_pairs)
-
-    # Reshape the resulting IoU values into a matrix
-    iou_matrix = np.array(iou_values).reshape((num_ref_instances, num_pred_instances))
-    return iou_matrix
-
-
 def map_instance_labels(processing_pair: UnmatchedInstancePair, labelmap: Instance_Label_Map) -> MatchedInstancePair:
     prediction_arr, reference_arr = processing_pair.prediction_arr, processing_pair.reference_arr
 
@@ -121,21 +100,6 @@ def map_instance_labels(processing_pair: UnmatchedInstancePair, labelmap: Instan
     return matched_instance_pair
 
 
-def _map_labels(arr: np.ndarray, label_map: dict[np.integer, np.integer]) -> np.ndarray:
-    """Maps labels in the given array according to the label_map dictionary.
-    Args:
-        label_map (dict): A dictionary that maps the original label values (str or int) to the new label values (int).
-
-    Returns:
-        np.ndarray: Returns a copy of the remapped array
-    """
-    data = arr.copy()
-    for v in np.unique(data):
-        if v in label_map:  # int needed to match non-integer data-types
-            data[arr == v] = label_map[v]
-    return data
-
-
 if __name__ == "__main__":
     a = np.array([0, 1, 2, 3, 4, 5], dtype=np.uint16)
     b = np.array([0, 1, 2, 6, 3, 7], dtype=np.uint16)
diff --git a/panoptica/seg_test.ipynb b/panoptica/seg_test.ipynb
deleted file mode 100644
index 79c59d5..0000000
--- a/panoptica/seg_test.ipynb
+++ /dev/null
@@ -1,73 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{1: 1, 2: 2, 3: 2, 4: 3}\n",
-      "{1: 1, 3: 2, 5: 3}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from instance_matcher import _map_instance_labels\n",
-    "import numpy as np\n",
-    "reference_arr = np.ndarray([\n",
-    "    0,1,2,3,4\n",
-    "])\n",
-    "pred_arr = np.ndarray([\n",
-    "    0,1,0,3,5\n",
-    "])\n",
-    "\n",
-    "#prediction 2 -> 3,4 ref\n",
-    "\n",
-    "labelmap = [([1],[1]),([2,3],[3]),([4],[5])]\n",
-    "#_map_instance_labels(pred_arr, reference_arr, labelmap=labelmap)\n",
-    "ref_labelmap = {}\n",
-    "pred_labelmap = {}\n",
-    "\n",
-    "for idx, (refs, preds) in enumerate(labelmap):\n",
-    "    for r in refs:\n",
-    "        ref_labelmap[r] = idx + 1\n",
-    "    for p in preds:\n",
-    "        pred_labelmap[p] = idx + 1\n",
-    "\n",
-    "print(ref_labelmap)\n",
-    "print(pred_labelmap)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "nakoseg",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/panoptica/semantic_evaluation/semantic_evaluator.py b/panoptica/semantic_evaluation/semantic_evaluator.py
deleted file mode 100644
index 781a28d..0000000
--- a/panoptica/semantic_evaluation/semantic_evaluator.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from __future__ import annotations
-
-from multiprocessing import Pool
-from typing import Tuple
-
-import cc3d
-import numpy as np
-from scipy import ndimage
-from scipy.optimize import linear_sum_assignment
-
-from panoptica.evaluator import Evaluator
-from panoptica.result import PanopticaResult
-from panoptica.semantic_evaluation.connected_component_backends import CCABackend
-from panoptica.timing import measure_time
-
-
-class SemanticSegmentationEvaluator(Evaluator):
-    """
-    Evaluator for semantic segmentation results.
-
-    This class extends the Evaluator class and provides methods for evaluating semantic segmentation masks
-    using metrics such as Intersection over Union (IoU) and Dice coefficient.
-
-    Args:
-        cca_backend (CCABackend): The backend for connected components labeling (Enum: CCABackend.cc3d or CCABackend.scipy).
-
-    Methods:
-        evaluate(reference_mask, prediction_mask, iou_threshold): Evaluate the segmentation masks.
-        _label_instances(mask): Label connected components in a segmentation mask.
-        _compute_instance_iou(ref_labels, pred_labels, ref_instance_idx, pred_instance_idx): Compute IoU for instances.
-        _compute_instance_dice_coefficient(ref_labels, pred_labels, ref_instance_idx, pred_instance_idx): Compute Dice coefficient for instances.
-    """
-
-    def __init__(self, cca_backend: CCABackend):
-        self.cca_backend = cca_backend
-        # TODO consider initializing evaluator with metrics it should compute
-
-    @measure_time
-    def evaluate(
-        self,
-        reference_mask: np.ndarray,
-        prediction_mask: np.ndarray,
-        iou_threshold: float,
-    ) -> PanopticaResult:
-        """
-        Evaluate the intersection over union (IoU) and Dice coefficient for semantic segmentation masks.
-
-        Args:
-            reference_mask (np.ndarray): The reference segmentation mask.
-            prediction_mask (np.ndarray): The predicted segmentation mask.
-            iou_threshold (float): The IoU threshold for considering a match.
-
-        Returns:
-            PanopticaResult: A named tuple containing evaluation results.
-        """
-        ref_labels, num_ref_instances = self._label_instances(
-            mask=reference_mask,
-        )
-
-        pred_labels, num_pred_instances = self._label_instances(
-            mask=prediction_mask,
-        )
-
-        self._handle_edge_cases(
-            num_ref_instances=num_ref_instances,
-            num_pred_instances=num_pred_instances,
-        )
-
-        # Create a pool of worker processes to parallelize the computation
-        with Pool() as pool:
-            # Generate all possible pairs of instance indices for IoU computation
-            instance_pairs = [
-                (ref_labels, pred_labels, ref_idx, pred_idx)
-                for ref_idx in range(1, num_ref_instances + 1)
-                for pred_idx in range(1, num_pred_instances + 1)
-            ]
-
-            # Calculate IoU for all instance pairs in parallel using starmap
-            iou_values = pool.starmap(self._compute_instance_iou, instance_pairs)
-
-        # Reshape the resulting IoU values into a matrix
-        iou_matrix = np.array(iou_values).reshape((num_ref_instances, num_pred_instances))
-
-        # Use linear_sum_assignment to find the best matches
-        ref_indices, pred_indices = linear_sum_assignment(-iou_matrix)
-
-        # Initialize variables for True Positives (tp) and False Positives (fp)
-        tp, dice_list, iou_list = 0, [], []
-
-        # Loop through matched instances to compute PQ components
-        for ref_idx, pred_idx in zip(ref_indices, pred_indices):
-            iou = iou_matrix[ref_idx][pred_idx]
-            if iou >= iou_threshold:
-                # Match found, increment true positive count and collect IoU and Dice values
-                tp += 1
-                iou_list.append(iou)
-
-                # Compute Dice for matched instances
-                dice = self._compute_instance_dice_coefficient(
-                    ref_labels=ref_labels,
-                    pred_labels=pred_labels,
-                    ref_instance_label=ref_idx + 1,
-                    pred_instance_label=pred_idx + 1,
-                )
-                dice_list.append(dice)
-
-        # Create and return the PanopticaResult object with computed metrics
-        return PanopticaResult(
-            num_ref_instances=num_ref_instances,
-            num_pred_instances=num_pred_instances,
-            tp=tp,
-            dice_list=dice_list,
-            iou_list=iou_list,
-        )
-
-    def _label_instances(
-        self,
-        mask: np.ndarray,
-    ) -> Tuple[np.ndarray, int]:
-        """
-        Label connected components in a segmentation mask.
-
-        Args:
-            mask (np.ndarray): Segmentation mask (2D or 3D array).
-
-        Returns:
-            Tuple[np.ndarray, int]:
-                - Labeled mask with instances
-                - Number of instances found
-        """
-        if self.cca_backend == CCABackend.cc3d:
-            labeled, num_instances = cc3d.connected_components(mask, return_N=True)
-        elif self.cca_backend == CCABackend.scipy:
-            labeled, num_instances = ndimage.label(mask)
-
-        return labeled, num_instances
-
-    def _compute_instance_iou(
-        self,
-        ref_labels: np.ndarray,
-        pred_labels: np.ndarray,
-        ref_instance_idx: int,
-        pred_instance_idx: int,
-    ) -> float:
-        """
-        Compute Intersection over Union (IoU) between a specific pair of reference and prediction instances.
-
-        Args:
-            ref_labels (np.ndarray): Reference instance labels.
-            pred_labels (np.ndarray): Prediction instance labels.
-            ref_instance_idx (int): Index of the reference instance.
-            pred_instance_idx (int): Index of the prediction instance.
-
-        Returns:
-            float: IoU between the specified instances.
-        """
-        ref_instance_mask = ref_labels == ref_instance_idx
-        pred_instance_mask = pred_labels == pred_instance_idx
-
-        return self._compute_iou(
-            reference=ref_instance_mask,
-            prediction=pred_instance_mask,
-        )
-
-    def _compute_instance_dice_coefficient(
-        self,
-        ref_labels: np.ndarray,
-        pred_labels: np.ndarray,
-        ref_instance_label: int,
-        pred_instance_label: int,
-    ) -> float:
-        """
-        Compute the Dice coefficient between a specific pair of instances.
-
-        The Dice coefficient measures the similarity or overlap between two binary masks representing instances.
-        It is defined as:
-
-        Dice = (2 * intersection) / (ref_area + pred_area)
-
-        Args:
-            ref_labels (np.ndarray): Reference instance labels.
-            pred_labels (np.ndarray): Prediction instance labels.
-            ref_instance_label (int): Label of the reference instance.
-            pred_instance_label (int): Label of the prediction instance.
-
-        Returns:
-            float: Dice coefficient between the specified instances. A value between 0 and 1, where higher values
-            indicate better overlap and similarity between instances.
-        """
-        ref_instance_mask = ref_labels == ref_instance_label
-        pred_instance_mask = pred_labels == pred_instance_label
-
-        return self._compute_dice_coefficient(
-            reference=ref_instance_mask,
-            prediction=pred_instance_mask,
-        )
diff --git a/panoptica/utils/__init__.py b/panoptica/utils/__init__.py
index f4ab7c0..aafd1d1 100644
--- a/panoptica/utils/__init__.py
+++ b/panoptica/utils/__init__.py
@@ -1,6 +1,5 @@
 from utils.numpy import _count_unique_without_zeros, _unique_without_zeros
-from utils.connected_component_backends import CCABackend
-from utils.datatypes import *
+from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map
 from utils.metrics import _compute_instance_volumetric_dice, _compute_instance_iou, _compute_dice_coefficient, _compute_iou
 
 # from utils.constants import
diff --git a/panoptica/utils/connected_component_backends.py b/panoptica/utils/connected_component_backends.py
deleted file mode 100644
index a03debf..0000000
--- a/panoptica/utils/connected_component_backends.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from enum import Enum, auto
-
-
-class CCABackend(Enum):
-    """
-    Enumeration representing different connected component analysis (CCA) backends.
-
-    This enumeration defines options for CCA backends, which are used for labeling connected components in segmentation masks.
-
-    Members:
-        - cc3d: Represents the Connected Components in 3D (CC3D) backend for CCA.
-          [CC3D Website](https://github.com/seung-lab/connected-components-3d)
-        - scipy: Represents the SciPy backend for CCA.
-          [SciPy Website](https://www.scipy.org/)
-    """
-
-    cc3d = auto()
-    scipy = auto()
diff --git a/panoptica/utils/constants.py b/panoptica/utils/constants.py
index 7342375..e76fd8b 100644
--- a/panoptica/utils/constants.py
+++ b/panoptica/utils/constants.py
@@ -1,8 +1,7 @@
 from enum import Enum, auto
-from typing_extensions import Self
 
 
-class Enum_Compare(Enum):
+class _Enum_Compare(Enum):
     def __eq__(self, __value: object) -> bool:
         if isinstance(__value, Enum):
             return self.name == __value.name and self.value == __value.value
@@ -21,11 +20,25 @@ def __hash__(self) -> int:
         return self.value
 
 
-# class Datatype_Status(Enum_Compare):
-#    Semantic_Map = auto()
-#    Unmatched_Instance_Map = auto()
-#    Matched_Instance_Map = auto()
+from enum import Enum, auto
+
+
+class CCABackend(_Enum_Compare):
+    """
+    Enumeration representing different connected component analysis (CCA) backends.
+
+    This enumeration defines options for CCA backends, which are used for labeling connected components in segmentation masks.
+
+    Members:
+        - cc3d: Represents the Connected Components in 3D (CC3D) backend for CCA.
+          [CC3D Website](https://github.com/seung-lab/connected-components-3d)
+        - scipy: Represents the SciPy backend for CCA.
+          [SciPy Website](https://www.scipy.org/)
+    """
+
+    cc3d = auto()
+    scipy = auto()
 
 
 if __name__ == "__main__":
-    #print(Datatype_Status.Semantic_Map)
+    print(CCABackend.cc3d)
diff --git a/panoptica/utils/datatypes.py b/panoptica/utils/datatypes.py
index 4379a4a..77274c9 100644
--- a/panoptica/utils/datatypes.py
+++ b/panoptica/utils/datatypes.py
@@ -9,7 +9,7 @@
 int_type: type = np.integer
 
 
-class ProcessingPair(ABC):
+class _ProcessingPair(ABC):
     prediction_arr: np.ndarray
     reference_arr: np.ndarray
     # unique labels without zero
@@ -31,7 +31,7 @@ def __setattr__(self, attr, value):
         self.__dict__[attr] = value
 
 
-class ProcessingPairInstanced(ProcessingPair):
+class _ProcessingPairInstanced(_ProcessingPair):
     n_prediction_instance: int
     n_reference_instance: int
 
@@ -78,7 +78,7 @@ def _check_array_integrity(prediction_arr: np.ndarray, reference_arr: np.ndarray
         ), f"prediction and/or reference are not dtype {dtype}, got {prediction_arr.dtype} and {reference_arr.dtype}"
 
 
-class SemanticPair(ProcessingPair):
+class SemanticPair(_ProcessingPair):
     """A Processing pair of any dtype
 
     Args:
@@ -89,7 +89,7 @@ def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray) -> Non
         super().__init__(prediction_arr, reference_arr, dtype=int_type)
 
 
-class UnmatchedInstancePair(ProcessingPairInstanced):
+class UnmatchedInstancePair(_ProcessingPairInstanced):
     """A Processing pair of any unsigned (but matching) integer type
 
     Args:
@@ -106,7 +106,7 @@ def __init__(
         super().__init__(prediction_arr, reference_arr, uint_type, n_prediction_instance, n_reference_instance)  # type:ignore
 
 
-class MatchedInstancePair(ProcessingPairInstanced):
+class MatchedInstancePair(_ProcessingPairInstanced):
     """A Processing pair of any unsigned (but matching) integer type consisting of only matched instance labels, as well as a list of missed labels from both
 
     Args:
diff --git a/panoptica/utils/numpy.py b/panoptica/utils/numpy.py
index 1410988..7f627b2 100644
--- a/panoptica/utils/numpy.py
+++ b/panoptica/utils/numpy.py
@@ -38,3 +38,15 @@ def _count_unique_without_zeros(arr: np.ndarray) -> int:
         warnings.warn("Negative values are present in the input array.")
 
     return len(_unique_without_zeros(arr))
+
+
+def _get_smallest_fitting_uint(max_value: int) -> type:
+    if max_value < 256:
+        dtype = np.uint8
+    elif max_value < 65536:
+        dtype = np.uint16
+    elif max_value < 4294967295:
+        dtype = np.uint32
+    else:
+        dtype = np.uint64
+    return dtype

From a6331b0e4bab5b534e42c63998b02899e4fbb093 Mon Sep 17 00:00:00 2001
From: Hendrik <hendrik.moeller@tum.de>
Date: Fri, 10 Nov 2023 10:57:38 +0100
Subject: [PATCH 3/6] added first unittest

---
 .../unit_tests/test_panoptic_evaluator.py     | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 panoptica/unit_tests/test_panoptic_evaluator.py

diff --git a/panoptica/unit_tests/test_panoptic_evaluator.py b/panoptica/unit_tests/test_panoptic_evaluator.py
new file mode 100644
index 0000000..1fe1f96
--- /dev/null
+++ b/panoptica/unit_tests/test_panoptic_evaluator.py
@@ -0,0 +1,35 @@
+# Call 'python -m unittest' on this folder
+# coverage run -m unittest
+# coverage report
+# coverage html
+import unittest
+
+from instance_approximator import ConnectedComponentsInstanceApproximator, CCABackend
+from instance_matcher import NaiveOneToOneMatching
+from instance_evaluator import evaluate_matched_instance
+import numpy as np
+from result import PanopticaResult
+from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, _ProcessingPair
+from evaluator import Panoptic_Evaluator
+
+
+class Test_Panoptic_Evaluator(unittest.TestCase):
+    def test_simple_evaluation(self):
+        a = np.zeros([50, 50], dtype=int)
+        b = a.copy()
+        a[20:40, 10:20] = 1
+        b[20:35, 10:20] = 2
+
+        sample = SemanticPair(b, a)
+
+        evaluator = Panoptic_Evaluator(
+            expected_input=SemanticPair,
+            instance_approximator=ConnectedComponentsInstanceApproximator(cca_backend=CCABackend.cc3d),
+            instance_matcher=NaiveOneToOneMatching(),
+        )
+
+        result, debug_data = evaluator.evaluate(sample)
+        self.assertEqual(result.tp, 1)
+        self.assertEqual(result.fp, 0)
+        self.assertEqual(result.sq, 0.75)
+        self.assertEqual(result.pq, 0.75)

From 8a3dbe44314618f80e9e719a41cc0802ca79f5ff Mon Sep 17 00:00:00 2001
From: Hendrik <hendrik.moeller@tum.de>
Date: Fri, 10 Nov 2023 10:57:44 +0100
Subject: [PATCH 4/6] fixed examples and imports

---
 examples/example_cfos_3d.py        |  2 +-
 examples/example_spine_instance.py | 10 ++++++++--
 examples/example_spine_semantic.py | 23 +++++++++++------------
 panoptica/__init__.py              | 10 +++++-----
 panoptica/_functionals.py          |  4 ++--
 panoptica/evaluator.py             | 12 ++++++------
 panoptica/instance_approximator.py |  6 +++---
 panoptica/instance_evaluator.py    |  6 +++---
 panoptica/instance_matcher.py      |  6 ++----
 panoptica/utils/__init__.py        |  6 +++---
 panoptica/utils/datatypes.py       |  2 +-
 panoptica/utils/numpy.py           |  4 ----
 12 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/examples/example_cfos_3d.py b/examples/example_cfos_3d.py
index c91f06f..502a2b9 100644
--- a/examples/example_cfos_3d.py
+++ b/examples/example_cfos_3d.py
@@ -1,6 +1,6 @@
 from auxiliary.nifti.io import read_nifti
 
-from panoptica import *
+from panoptica import SemanticPair, Panoptic_Evaluator, ConnectedComponentsInstanceApproximator, CCABackend, NaiveOneToOneMatching
 
 pred_masks = read_nifti(
     input_nifti_path="/home/florian/flow/cfos_analysis/data/ablation/2021-11-25_23-50-56_2021-10-25_19-38-31_tr_dice_bce_11/patchvolume_695_2.nii.gz"
diff --git a/examples/example_spine_instance.py b/examples/example_spine_instance.py
index cedaf18..763ee02 100644
--- a/examples/example_spine_instance.py
+++ b/examples/example_spine_instance.py
@@ -1,12 +1,18 @@
 from auxiliary.nifti.io import read_nifti
 
-from panoptica import *
+from panoptica import (
+    UnmatchedInstancePair,
+    Panoptic_Evaluator,
+    ConnectedComponentsInstanceApproximator,
+    CCABackend,
+    NaiveOneToOneMatching,
+)
 
 ref_masks = read_nifti("examples/spine_seg/instance/sub-0007_mod-T2w_seg-vert_msk.nii.gz")
 pred_masks = read_nifti("examples/spine_seg/instance/sub-0007_mod-T2w_seg-vert_msk_new.nii.gz")
 
 
-sample = SemanticPair(pred_masks, ref_masks)
+sample = UnmatchedInstancePair(pred_masks, ref_masks)
 
 evaluator = Panoptic_Evaluator(
     expected_input=UnmatchedInstancePair,
diff --git a/examples/example_spine_semantic.py b/examples/example_spine_semantic.py
index 85406dc..5466e4b 100644
--- a/examples/example_spine_semantic.py
+++ b/examples/example_spine_semantic.py
@@ -1,20 +1,19 @@
 from auxiliary.nifti.io import read_nifti
 
-from panoptica import CCABackend, SemanticSegmentationEvaluator
+from panoptica import SemanticPair, Panoptic_Evaluator, ConnectedComponentsInstanceApproximator, CCABackend, NaiveOneToOneMatching
+
+ref_masks = read_nifti("examples/spine_seg/semantic/sub-0007_mod-T2w_seg-spine_msk.nii.gz")
+pred_masks = read_nifti("examples/spine_seg/semantic/sub-0007_mod-T2w_seg-spine_msk_new.nii.gz")
 
-ref_masks = read_nifti(
-    "examples/spine_seg/semantic/sub-0007_mod-T2w_seg-spine_msk.nii.gz"
-)
-pred_masks = read_nifti(
-    "examples/spine_seg/semantic/sub-0007_mod-T2w_seg-spine_msk_new.nii.gz"
-)
 
+sample = SemanticPair(pred_masks, ref_masks)
 
-eva = SemanticSegmentationEvaluator(cca_backend=CCABackend.cc3d)
-res = eva.evaluate(
-    reference_mask=ref_masks,
-    prediction_mask=pred_masks,
+evaluator = Panoptic_Evaluator(
+    expected_input=SemanticPair,
+    instance_approximator=ConnectedComponentsInstanceApproximator(cca_backend=CCABackend.cc3d),
+    instance_matcher=NaiveOneToOneMatching(),
     iou_threshold=0.5,
 )
 
-print(res)
+result, debug_data = evaluator.evaluate(sample)
+print(result)
diff --git a/panoptica/__init__.py b/panoptica/__init__.py
index a39a116..24edcf4 100644
--- a/panoptica/__init__.py
+++ b/panoptica/__init__.py
@@ -1,5 +1,5 @@
-from instance_approximator import ConnectedComponentsInstanceApproximator, CCABackend
-from instance_matcher import NaiveOneToOneMatching
-from evaluator import Panoptic_Evaluator
-from result import PanopticaResult
-from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
+from panoptica.instance_approximator import ConnectedComponentsInstanceApproximator, CCABackend
+from panoptica.instance_matcher import NaiveOneToOneMatching
+from panoptica.evaluator import Panoptic_Evaluator
+from panoptica.result import PanopticaResult
+from panoptica.utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
diff --git a/panoptica/_functionals.py b/panoptica/_functionals.py
index aa4aa4c..0f6637a 100644
--- a/panoptica/_functionals.py
+++ b/panoptica/_functionals.py
@@ -1,7 +1,7 @@
 import numpy as np
 from multiprocessing import Pool
-from utils.metrics import _compute_instance_iou
-from utils.constants import CCABackend
+from panoptica.utils.metrics import _compute_instance_iou
+from panoptica.utils.constants import CCABackend
 
 
 def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels, pred_labels):
diff --git a/panoptica/evaluator.py b/panoptica/evaluator.py
index 98225cc..ecae4dd 100644
--- a/panoptica/evaluator.py
+++ b/panoptica/evaluator.py
@@ -2,12 +2,12 @@
 
 import numpy as np
 
-from result import PanopticaResult
-from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, _ProcessingPair, _ProcessingPairInstanced
-from instance_approximator import InstanceApproximator
-from instance_matcher import InstanceMatchingAlgorithm
-from instance_evaluator import evaluate_matched_instance
-from timing import measure_time
+from panoptica.result import PanopticaResult
+from panoptica.utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, _ProcessingPair
+from panoptica.instance_approximator import InstanceApproximator
+from panoptica.instance_matcher import InstanceMatchingAlgorithm
+from panoptica.instance_evaluator import evaluate_matched_instance
+from panoptica.timing import measure_time
 
 
 class Panoptic_Evaluator:
diff --git a/panoptica/instance_approximator.py b/panoptica/instance_approximator.py
index f6a5318..57c57e1 100644
--- a/panoptica/instance_approximator.py
+++ b/panoptica/instance_approximator.py
@@ -1,7 +1,7 @@
 from abc import abstractmethod, ABC
-from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
-from _functionals import _connected_components, CCABackend
-from utils.numpy import _get_smallest_fitting_uint
+from panoptica.utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
+from panoptica._functionals import _connected_components, CCABackend
+from panoptica.utils.numpy import _get_smallest_fitting_uint
 import numpy as np
 
 
diff --git a/panoptica/instance_evaluator.py b/panoptica/instance_evaluator.py
index f530ced..6c37dcf 100644
--- a/panoptica/instance_evaluator.py
+++ b/panoptica/instance_evaluator.py
@@ -1,8 +1,8 @@
 import concurrent.futures
-from utils.datatypes import MatchedInstancePair
-from result import PanopticaResult
+from panoptica.utils.datatypes import MatchedInstancePair
+from panoptica.result import PanopticaResult
 import numpy as np
-from utils.metrics import _compute_iou, _compute_dice_coefficient
+from panoptica.utils.metrics import _compute_iou, _compute_dice_coefficient
 
 
 def evaluate_matched_instance(semantic_pair: MatchedInstancePair, iou_threshold: float, **kwargs) -> PanopticaResult:
diff --git a/panoptica/instance_matcher.py b/panoptica/instance_matcher.py
index ce85866..217c772 100644
--- a/panoptica/instance_matcher.py
+++ b/panoptica/instance_matcher.py
@@ -1,9 +1,7 @@
 from abc import abstractmethod, ABC
-from utils.datatypes import UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map, uint_type
-from multiprocessing import Pool
+from panoptica.utils.datatypes import UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map
 import numpy as np
-from utils import _unique_without_zeros, _compute_instance_iou
-from _functionals import _map_labels, _calc_iou_matrix
+from panoptica._functionals import _map_labels, _calc_iou_matrix
 from scipy.optimize import linear_sum_assignment
 
 
diff --git a/panoptica/utils/__init__.py b/panoptica/utils/__init__.py
index aafd1d1..4710da3 100644
--- a/panoptica/utils/__init__.py
+++ b/panoptica/utils/__init__.py
@@ -1,5 +1,5 @@
-from utils.numpy import _count_unique_without_zeros, _unique_without_zeros
-from utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map
-from utils.metrics import _compute_instance_volumetric_dice, _compute_instance_iou, _compute_dice_coefficient, _compute_iou
+from panoptica.utils.numpy import _count_unique_without_zeros, _unique_without_zeros
+from panoptica.utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map
+from panoptica.utils.metrics import _compute_instance_volumetric_dice, _compute_instance_iou, _compute_dice_coefficient, _compute_iou
 
 # from utils.constants import
diff --git a/panoptica/utils/datatypes.py b/panoptica/utils/datatypes.py
index 77274c9..4010804 100644
--- a/panoptica/utils/datatypes.py
+++ b/panoptica/utils/datatypes.py
@@ -3,7 +3,7 @@
 from numpy import dtype
 from abc import ABC
 import warnings
-from utils import _count_unique_without_zeros, _unique_without_zeros
+from panoptica.utils import _count_unique_without_zeros, _unique_without_zeros
 
 uint_type: type = np.unsignedinteger
 int_type: type = np.integer
diff --git a/panoptica/utils/numpy.py b/panoptica/utils/numpy.py
index 7f627b2..4e91604 100644
--- a/panoptica/utils/numpy.py
+++ b/panoptica/utils/numpy.py
@@ -1,9 +1,5 @@
 import warnings
-from typing import Tuple
-
-import cc3d
 import numpy as np
-from scipy import ndimage
 
 
 def _unique_without_zeros(arr: np.ndarray) -> np.ndarray:

From d5c49eaafd2d581f174c038c3b38dd4c2d23a56d Mon Sep 17 00:00:00 2001
From: Hendrik <hendrik.moeller@tum.de>
Date: Fri, 10 Nov 2023 12:27:56 +0100
Subject: [PATCH 5/6] fixed small bug in evaluation, where iou below threshold
 were used in metrics. set default values for Panoptic_Evaluator

---
 examples/example_spine_instance.py           | 11 +++++-----
 panoptica/_functionals.py                    |  8 ++-----
 panoptica/evaluator.py                       | 13 ++++++------
 panoptica/instance_approximator.py           |  2 +-
 panoptica/instance_evaluator.py              | 20 +++++++++---------
 panoptica/instance_matcher.py                |  6 +++---
 panoptica/result.py                          |  9 ++++----
 panoptica/utils/__init__.py                  |  2 +-
 panoptica/utils/datatypes.py                 | 22 +++++++++++++-------
 panoptica/utils/{numpy.py => numpy_utils.py} |  0
 10 files changed, 48 insertions(+), 45 deletions(-)
 rename panoptica/utils/{numpy.py => numpy_utils.py} (100%)

diff --git a/examples/example_spine_instance.py b/examples/example_spine_instance.py
index 9e0044e..3e883f1 100644
--- a/examples/example_spine_instance.py
+++ b/examples/example_spine_instance.py
@@ -1,14 +1,15 @@
 from auxiliary.nifti.io import read_nifti
+import numpy as np
 
-from panoptica import UnmatchedInstancePair, Panoptic_Evaluator, NaiveOneToOneMatching
+from panoptica import MatchedInstancePair, Panoptic_Evaluator, NaiveOneToOneMatching
 
-ref_masks = read_nifti("examples/spine_seg/instance_example/sub-0007_mod-T2w_seg-vert_msk.nii.gz")
-pred_masks = read_nifti("examples/spine_seg/instance_example/sub-0007_mod-T2w_seg-vert_msk_new.nii.gz")
+ref_masks = read_nifti("repo/examples/spine_seg/instance_example/sub-0007_mod-T2w_seg-vert_msk.nii.gz")
+pred_masks = read_nifti("repo/examples/spine_seg/instance_example/sub-0007_mod-T2w_seg-vert_msk_new.nii.gz")
 
-sample = UnmatchedInstancePair(pred_masks, ref_masks)
+sample = MatchedInstancePair(prediction_arr=pred_masks, reference_arr=ref_masks)
 
 evaluator = Panoptic_Evaluator(
-    expected_input=UnmatchedInstancePair,
+    expected_input=MatchedInstancePair,
     instance_approximator=None,
     instance_matcher=NaiveOneToOneMatching(),
     iou_threshold=0.5,
diff --git a/panoptica/_functionals.py b/panoptica/_functionals.py
index 0f6637a..8b4ef3a 100644
--- a/panoptica/_functionals.py
+++ b/panoptica/_functionals.py
@@ -4,18 +4,14 @@
 from panoptica.utils.constants import CCABackend
 
 
-def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels, pred_labels):
+def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels: list[int], pred_labels: list[int]):
     num_ref_instances = len(ref_labels)
     num_pred_instances = len(pred_labels)
 
     # Create a pool of worker processes to parallelize the computation
     with Pool() as pool:
         # Generate all possible pairs of instance indices for IoU computation
-        instance_pairs = [
-            (reference_arr, prediction_arr, ref_idx, pred_idx)
-            for ref_idx in range(1, num_ref_instances + 1)
-            for pred_idx in range(1, num_pred_instances + 1)
-        ]
+        instance_pairs = [(reference_arr, prediction_arr, ref_idx, pred_idx) for ref_idx in ref_labels for pred_idx in pred_labels]
 
         # Calculate IoU for all instance pairs in parallel using starmap
         iou_values = pool.starmap(_compute_instance_iou, instance_pairs)
diff --git a/panoptica/evaluator.py b/panoptica/evaluator.py
index ecae4dd..828ccd4 100644
--- a/panoptica/evaluator.py
+++ b/panoptica/evaluator.py
@@ -13,9 +13,9 @@
 class Panoptic_Evaluator:
     def __init__(
         self,
-        expected_input: type(SemanticPair) | type(UnmatchedInstancePair) | type(MatchedInstancePair),
-        instance_approximator: InstanceApproximator | None,
-        instance_matcher: InstanceMatchingAlgorithm | None,
+        expected_input: type(SemanticPair) | type(UnmatchedInstancePair) | type(MatchedInstancePair) = type(MatchedInstancePair),
+        instance_approximator: InstanceApproximator | None = None,
+        instance_matcher: InstanceMatchingAlgorithm | None = None,
         iou_threshold: float = 0.5,
     ) -> None:
         self.__expected_input = expected_input
@@ -36,10 +36,9 @@ def evaluate(self, processing_pair: _ProcessingPair) -> tuple[PanopticaResult, d
 
 def panoptic_evaluate(
     processing_pair: SemanticPair | UnmatchedInstancePair | MatchedInstancePair | PanopticaResult,
-    instance_approximator: InstanceApproximator | None,
-    instance_matcher: InstanceMatchingAlgorithm | None,
-    iou_threshold: float,
-    verbose: bool = False,
+    instance_approximator: InstanceApproximator | None = None,
+    instance_matcher: InstanceMatchingAlgorithm | None = None,
+    iou_threshold: float = 0.5,
     **kwargs,
 ) -> tuple[PanopticaResult, dict[str, _ProcessingPair]]:
     debug_data: dict[str, _ProcessingPair] = {}
diff --git a/panoptica/instance_approximator.py b/panoptica/instance_approximator.py
index 57c57e1..4ce9497 100644
--- a/panoptica/instance_approximator.py
+++ b/panoptica/instance_approximator.py
@@ -1,7 +1,7 @@
 from abc import abstractmethod, ABC
 from panoptica.utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair
 from panoptica._functionals import _connected_components, CCABackend
-from panoptica.utils.numpy import _get_smallest_fitting_uint
+from panoptica.utils.numpy_utils import _get_smallest_fitting_uint
 import numpy as np
 
 
diff --git a/panoptica/instance_evaluator.py b/panoptica/instance_evaluator.py
index 6c37dcf..c06ac71 100644
--- a/panoptica/instance_evaluator.py
+++ b/panoptica/instance_evaluator.py
@@ -28,9 +28,8 @@ def evaluate_matched_instance(semantic_pair: MatchedInstancePair, iou_threshold:
         for future in concurrent.futures.as_completed(futures):
             tp_i, dice_i, iou_i = future.result()
             tp += tp_i
-            if dice_i is not None:
+            if dice_i is not None and iou_i is not None:
                 dice_list.append(dice_i)
-            if iou_i is not None:
                 iou_list.append(iou_i)
     # Create and return the PanopticaResult object with computed metrics
     return PanopticaResult(
@@ -43,11 +42,11 @@ def evaluate_matched_instance(semantic_pair: MatchedInstancePair, iou_threshold:
 
 
 def _evaluate_instance(
-    ref_labels: np.ndarray,
-    pred_labels: np.ndarray,
+    reference_arr: np.ndarray,
+    prediction_arr: np.ndarray,
     ref_idx: int,
     iou_threshold: float,
-) -> tuple[int, float, float]:
+) -> tuple[int, float | None, float | None]:
     """
     Evaluate a single instance.
 
@@ -60,18 +59,19 @@ def _evaluate_instance(
     Returns:
         Tuple[int, float, float]: Tuple containing True Positives (int), Dice coefficient (float), and IoU (float).
     """
-    iou = _compute_iou(
-        reference=ref_labels == ref_idx,
-        prediction=pred_labels == ref_idx,
+    iou: float | None = _compute_iou(
+        reference=reference_arr == ref_idx,
+        prediction=prediction_arr == ref_idx,
     )
     if iou > iou_threshold:
         tp = 1
         dice = _compute_dice_coefficient(
-            reference=ref_labels == ref_idx,
-            prediction=pred_labels == ref_idx,
+            reference=reference_arr == ref_idx,
+            prediction=prediction_arr == ref_idx,
         )
     else:
         tp = 0
         dice = None
+        iou = None
 
     return tp, dice, iou
diff --git a/panoptica/instance_matcher.py b/panoptica/instance_matcher.py
index 217c772..f88edb0 100644
--- a/panoptica/instance_matcher.py
+++ b/panoptica/instance_matcher.py
@@ -35,16 +35,14 @@ def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwa
         ref_indices, pred_indices = linear_sum_assignment(-iou_matrix)
 
         # Initialize variables for True Positives (tp) and False Positives (fp)
-        tp, iou_list = 0, []
         labelmap: Instance_Label_Map = []
 
         # Loop through matched instances to compute PQ components
         for ref_idx, pred_idx in zip(ref_indices, pred_indices):
+            # TODO skip indices that have been matched already
             iou = iou_matrix[ref_idx][pred_idx]
             if iou >= self.iou_threshold:
                 # Match found, increment true positive count and collect IoU and Dice values
-                tp += 1
-                iou_list.append(iou)
                 labelmap.append(([ref_labels[ref_idx]], [pred_labels[pred_idx]]))
                 # map label ref_idx to pred_idx
         return labelmap
@@ -62,6 +60,8 @@ def map_instance_labels(processing_pair: UnmatchedInstancePair, labelmap: Instan
     pred_labelmap = {}
     ref_labelmap = {}
     label_counter = 1
+    # TODO map only predictions onto reference, but vice versa (leave reference untouched, unmatched predictions get next best labels)
+
     # Go over instance labelmap and assign the matched instance sequentially
     for refs, preds in labelmap:
         for r, p in zip(refs, preds):
diff --git a/panoptica/result.py b/panoptica/result.py
index 705f0c9..794874d 100644
--- a/panoptica/result.py
+++ b/panoptica/result.py
@@ -54,7 +54,7 @@ def __str__(self):
         )
 
     def to_dict(self):
-        return ({
+        return {
             "num_pred_instances": self.num_pred_instances,
             "num_ref_instances": self.num_ref_instances,
             "tp": self.tp,
@@ -65,9 +65,8 @@ def to_dict(self):
             "sq_sd": self.sq_sd,
             "pq": self.pq,
             "instance_dice": self.instance_dice,
-            "instance_dice_sd": self.instance_dice_sd
-            }
-        )
+            "instance_dice_sd": self.instance_dice_sd,
+        }
 
     @property
     def num_ref_instances(self) -> int:
@@ -171,7 +170,7 @@ def instance_dice(self) -> float:
         Returns:
             float: Average Dice coefficient.
         """
-        if self.tp == 0: 
+        if self.tp == 0:
             return 0.0
         return np.sum(self._dice_list) / self.tp
 
diff --git a/panoptica/utils/__init__.py b/panoptica/utils/__init__.py
index 4710da3..4eaee7f 100644
--- a/panoptica/utils/__init__.py
+++ b/panoptica/utils/__init__.py
@@ -1,4 +1,4 @@
-from panoptica.utils.numpy import _count_unique_without_zeros, _unique_without_zeros
+from panoptica.utils.numpy_utils import _count_unique_without_zeros, _unique_without_zeros
 from panoptica.utils.datatypes import SemanticPair, UnmatchedInstancePair, MatchedInstancePair, Instance_Label_Map
 from panoptica.utils.metrics import _compute_instance_volumetric_dice, _compute_instance_iou, _compute_dice_coefficient, _compute_iou
 
diff --git a/panoptica/utils/datatypes.py b/panoptica/utils/datatypes.py
index 4010804..8c0cd69 100644
--- a/panoptica/utils/datatypes.py
+++ b/panoptica/utils/datatypes.py
@@ -20,8 +20,8 @@ def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype:
         _check_array_integrity(prediction_arr, reference_arr, dtype=dtype)
         self.prediction_arr = prediction_arr
         self.reference_arr = reference_arr
-        self.ref_labels = tuple(_unique_without_zeros(reference_arr))
-        self.pred_labels = tuple(_unique_without_zeros(prediction_arr))
+        self.ref_labels: tuple[int] = tuple(_unique_without_zeros(reference_arr))  # type:ignore
+        self.pred_labels: tuple[int] = tuple(_unique_without_zeros(prediction_arr))  # type:ignore
 
     # Make all variables read-only!
     def __setattr__(self, attr, value):
@@ -121,16 +121,24 @@ def __init__(
         self,
         prediction_arr: np.ndarray,
         reference_arr: np.ndarray,
-        missed_reference_labels: list[int],
-        missed_prediction_labels: list[int],
-        n_matched_instances: int,
+        missed_reference_labels: list[int] | None = None,
+        missed_prediction_labels: list[int] | None = None,
+        n_matched_instances: int | None = None,
         n_prediction_instance: int | None = None,
         n_reference_instance: int | None = None,
     ) -> None:
+        super().__init__(prediction_arr, reference_arr, uint_type, n_prediction_instance, n_reference_instance)  # type:ignore
+        if n_matched_instances is None:
+            n_matched_instances = len([i for i in self.pred_labels if i in self.ref_labels])
+        self.n_matched_instances = n_matched_instances
+
+        if missed_reference_labels is None:
+            missed_reference_labels = list([i for i in self.ref_labels if i not in self.pred_labels])
         self.missed_reference_labels = missed_reference_labels
+
+        if missed_prediction_labels is None:
+            missed_prediction_labels = list([i for i in self.pred_labels if i not in self.ref_labels])
         self.missed_prediction_labels = missed_prediction_labels
-        self.n_matched_instances = n_matched_instances
-        super().__init__(prediction_arr, reference_arr, uint_type, n_prediction_instance, n_reference_instance)  # type:ignore
 
     def copy(self):
         return type(self)(
diff --git a/panoptica/utils/numpy.py b/panoptica/utils/numpy_utils.py
similarity index 100%
rename from panoptica/utils/numpy.py
rename to panoptica/utils/numpy_utils.py

From 05bc463772f18433b4a39be31f56714f30e839af Mon Sep 17 00:00:00 2001
From: Hendrik <hendrik.moeller@tum.de>
Date: Fri, 10 Nov 2023 13:52:23 +0100
Subject: [PATCH 6/6] added docstrings for all functions

---
 panoptica/_functionals.py          |  39 ++++++++++-
 panoptica/evaluator.py             |  35 ++++++++++
 panoptica/instance_approximator.py |  84 ++++++++++++++++++++++++
 panoptica/instance_evaluator.py    |  15 +++++
 panoptica/instance_matcher.py      | 100 +++++++++++++++++++++++++++++
 panoptica/utils/datatypes.py       |  73 +++++++++++++++++----
 panoptica/utils/numpy_utils.py     |  13 ++++
 7 files changed, 345 insertions(+), 14 deletions(-)

diff --git a/panoptica/_functionals.py b/panoptica/_functionals.py
index 8b4ef3a..158d1b8 100644
--- a/panoptica/_functionals.py
+++ b/panoptica/_functionals.py
@@ -5,6 +5,24 @@
 
 
 def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_labels: list[int], pred_labels: list[int]):
+    """
+    Calculate the Intersection over Union (IoU) matrix between reference and prediction arrays.
+
+    Args:
+        prediction_arr (np.ndarray): Numpy array containing the prediction labels.
+        reference_arr (np.ndarray): Numpy array containing the reference labels.
+        ref_labels (list[int]): List of unique reference labels.
+        pred_labels (list[int]): List of unique prediction labels.
+
+    Returns:
+        np.ndarray: IoU matrix where each element represents the IoU between a reference and prediction instance.
+
+    Example:
+    >>> _calc_iou_matrix(np.array([1, 2, 3]), np.array([4, 5, 6]), [1, 2, 3], [4, 5, 6])
+    array([[0. , 0. , 0. ],
+           [0. , 0. , 0. ],
+           [0. , 0. , 0. ]])
+    """
     num_ref_instances = len(ref_labels)
     num_pred_instances = len(pred_labels)
 
@@ -22,7 +40,9 @@ def _calc_iou_matrix(prediction_arr: np.ndarray, reference_arr: np.ndarray, ref_
 
 
 def _map_labels(arr: np.ndarray, label_map: dict[np.integer, np.integer]) -> np.ndarray:
-    """Maps labels in the given array according to the label_map dictionary.
+    """
+    Maps labels in the given array according to the label_map dictionary.
+
     Args:
         label_map (dict): A dictionary that maps the original label values (str or int) to the new label values (int).
 
@@ -40,6 +60,23 @@ def _connected_components(
     array: np.ndarray,
     cca_backend: CCABackend,
 ) -> tuple[np.ndarray, int]:
+    """
+    Label connected components in a binary array using a specified connected components algorithm.
+
+    Args:
+        array (np.ndarray): Binary array containing connected components.
+        cca_backend (CCABackend): Enum indicating the connected components algorithm backend (CCABackend.cc3d or CCABackend.scipy).
+
+    Returns:
+        tuple[np.ndarray, int]: A tuple containing the labeled array and the number of connected components.
+
+    Raises:
+        NotImplementedError: If the specified connected components algorithm backend is not implemented.
+
+    Example:
+    >>> _connected_components(np.array([[1, 0, 1], [0, 1, 1], [1, 0, 0]]), CCABackend.scipy)
+    (array([[1, 0, 2], [0, 3, 3], [4, 0, 0]]), 4)
+    """
     if cca_backend == CCABackend.cc3d:
         import cc3d
 
diff --git a/panoptica/evaluator.py b/panoptica/evaluator.py
index 828ccd4..8865663 100644
--- a/panoptica/evaluator.py
+++ b/panoptica/evaluator.py
@@ -18,6 +18,14 @@ def __init__(
         instance_matcher: InstanceMatchingAlgorithm | None = None,
         iou_threshold: float = 0.5,
     ) -> None:
+        """Creates a Panoptic_Evaluator, that saves some parameters to be used for all subsequent evaluations
+
+        Args:
+            expected_input (type, optional): Expected DataPair Input. Defaults to type(MatchedInstancePair).
+            instance_approximator (InstanceApproximator | None, optional): Determines which instance approximator is used if necessary. Defaults to None.
+            instance_matcher (InstanceMatchingAlgorithm | None, optional): Determines which instance matching algorithm is used if necessary. Defaults to None.
+            iou_threshold (float, optional): Iou Threshold for evaluation. Defaults to 0.5.
+        """
         self.__expected_input = expected_input
         self.__instance_approximator = instance_approximator
         self.__instance_matcher = instance_matcher
@@ -41,6 +49,33 @@ def panoptic_evaluate(
     iou_threshold: float = 0.5,
     **kwargs,
 ) -> tuple[PanopticaResult, dict[str, _ProcessingPair]]:
+    """
+    Perform panoptic evaluation on the given processing pair.
+
+    Args:
+        processing_pair (SemanticPair | UnmatchedInstancePair | MatchedInstancePair | PanopticaResult):
+            The processing pair to be evaluated.
+        instance_approximator (InstanceApproximator | None, optional):
+            The instance approximator used for approximating instances in the SemanticPair.
+        instance_matcher (InstanceMatchingAlgorithm | None, optional):
+            The instance matcher used for matching instances in the UnmatchedInstancePair.
+        iou_threshold (float, optional):
+            The IoU threshold for evaluating matched instances. Defaults to 0.5.
+        **kwargs:
+            Additional keyword arguments.
+
+    Returns:
+        tuple[PanopticaResult, dict[str, _ProcessingPair]]:
+            A tuple containing the panoptic result and a dictionary of debug data.
+
+    Raises:
+        AssertionError: If the input processing pair does not match the expected types.
+        RuntimeError: If the end of the panoptic pipeline is reached without producing results.
+
+    Example:
+    >>> panoptic_evaluate(SemanticPair(...), instance_approximator=InstanceApproximator(), iou_threshold=0.6)
+    (PanopticaResult(...), {'UnmatchedInstanceMap': _ProcessingPair(...), 'MatchedInstanceMap': _ProcessingPair(...)})
+    """
     debug_data: dict[str, _ProcessingPair] = {}
     # First Phase: Instance Approximation
     if isinstance(processing_pair, PanopticaResult):
diff --git a/panoptica/instance_approximator.py b/panoptica/instance_approximator.py
index 4ce9497..d98fca2 100644
--- a/panoptica/instance_approximator.py
+++ b/panoptica/instance_approximator.py
@@ -6,11 +6,61 @@
 
 
 class InstanceApproximator(ABC):
+    """
+    Abstract base class for instance approximation algorithms in panoptic segmentation evaluation.
+
+    Attributes:
+        None
+
+    Methods:
+        _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+            Abstract method to be implemented by subclasses for instance approximation.
+
+        approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+            Perform instance approximation on the given SemanticPair.
+
+    Raises:
+        AssertionError: If there are negative values in the semantic maps, which is not allowed.
+
+    Example:
+    >>> class CustomInstanceApproximator(InstanceApproximator):
+    ...     def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+    ...         # Implementation of instance approximation algorithm
+    ...         pass
+    ...
+    >>> approximator = CustomInstanceApproximator()
+    >>> semantic_pair = SemanticPair(...)
+    >>> result = approximator.approximate_instances(semantic_pair)
+    """
+
     @abstractmethod
     def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+        """
+        Abstract method to be implemented by subclasses for instance approximation.
+
+        Args:
+            semantic_pair (SemanticPair): The semantic pair to be approximated.
+            **kwargs: Additional keyword arguments.
+
+        Returns:
+            UnmatchedInstancePair | MatchedInstancePair: The result of the instance approximation.
+        """
         pass
 
     def approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair | MatchedInstancePair:
+        """
+        Perform instance approximation on the given SemanticPair.
+
+        Args:
+            semantic_pair (SemanticPair): The semantic pair to be approximated.
+            **kwargs: Additional keyword arguments.
+
+        Returns:
+            UnmatchedInstancePair | MatchedInstancePair: The result of the instance approximation.
+
+        Raises:
+            AssertionError: If there are negative values in the semantic maps, which is not allowed.
+        """
         # Call algorithm
         instance_pair = self._approximate_instances(semantic_pair, **kwargs)
         # Check validity
@@ -25,10 +75,44 @@ def approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> Unmatc
 
 
 class ConnectedComponentsInstanceApproximator(InstanceApproximator):
+    """
+    Instance approximator using connected components algorithm for panoptic segmentation evaluation.
+
+    Attributes:
+        cca_backend (CCABackend): The connected components algorithm backend.
+
+    Methods:
+        __init__(self, cca_backend: CCABackend) -> None:
+            Initialize the ConnectedComponentsInstanceApproximator.
+        _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair:
+            Approximate instances using the connected components algorithm.
+
+    Example:
+    >>> cca_approximator = ConnectedComponentsInstanceApproximator(cca_backend=CCABackend.cc3d)
+    >>> semantic_pair = SemanticPair(...)
+    >>> result = cca_approximator.approximate_instances(semantic_pair)
+    """
+
     def __init__(self, cca_backend: CCABackend) -> None:
+        """
+        Initialize the ConnectedComponentsInstanceApproximator.
+
+        Args:
+            cca_backend (CCABackend): The connected components algorithm backend.
+        """
         self.cca_backend = cca_backend
 
     def _approximate_instances(self, semantic_pair: SemanticPair, **kwargs) -> UnmatchedInstancePair:
+        """
+        Approximate instances using the connected components algorithm.
+
+        Args:
+            semantic_pair (SemanticPair): The semantic pair to be approximated.
+            **kwargs: Additional keyword arguments.
+
+        Returns:
+            UnmatchedInstancePair: The result of the instance approximation.
+        """
         prediction_arr, n_prediction_instance = _connected_components(semantic_pair.prediction_arr, self.cca_backend)
         reference_arr, n_reference_instance = _connected_components(semantic_pair.reference_arr, self.cca_backend)
         return UnmatchedInstancePair(
diff --git a/panoptica/instance_evaluator.py b/panoptica/instance_evaluator.py
index c06ac71..eb68157 100644
--- a/panoptica/instance_evaluator.py
+++ b/panoptica/instance_evaluator.py
@@ -6,6 +6,21 @@
 
 
 def evaluate_matched_instance(semantic_pair: MatchedInstancePair, iou_threshold: float, **kwargs) -> PanopticaResult:
+    """
+    Map instance labels based on the provided labelmap and create a MatchedInstancePair.
+
+    Args:
+        processing_pair (UnmatchedInstancePair): The unmatched instance pair containing original labels.
+        labelmap (Instance_Label_Map): The instance label map obtained from instance matching.
+
+    Returns:
+        MatchedInstancePair: The result of mapping instance labels.
+
+    Example:
+    >>> unmatched_instance_pair = UnmatchedInstancePair(...)
+    >>> labelmap = [([1, 2], [3, 4]), ([5], [6])]
+    >>> result = map_instance_labels(unmatched_instance_pair, labelmap)
+    """
     # Initialize variables for True Positives (tp)
     tp, dice_list, iou_list = 0, [], []
 
diff --git a/panoptica/instance_matcher.py b/panoptica/instance_matcher.py
index f88edb0..0bf317b 100644
--- a/panoptica/instance_matcher.py
+++ b/panoptica/instance_matcher.py
@@ -6,23 +6,107 @@
 
 
 class InstanceMatchingAlgorithm(ABC):
+    """
+    Abstract base class for instance matching algorithms in panoptic segmentation evaluation.
+
+    Attributes:
+        None
+
+    Methods:
+        _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+            Abstract method to be implemented by subclasses for instance matching.
+
+        match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> MatchedInstancePair:
+            Perform instance matching on the given UnmatchedInstancePair.
+
+    Example:
+    >>> class CustomInstanceMatcher(InstanceMatchingAlgorithm):
+    ...     def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+    ...         # Implementation of instance matching algorithm
+    ...         pass
+    ...
+    >>> matcher = CustomInstanceMatcher()
+    >>> unmatched_instance_pair = UnmatchedInstancePair(...)
+    >>> result = matcher.match_instances(unmatched_instance_pair)
+    """
+
     @abstractmethod
     def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+        """
+        Abstract method to be implemented by subclasses for instance matching.
+
+        Args:
+            unmatched_instance_pair (UnmatchedInstancePair): The unmatched instance pair to be matched.
+            **kwargs: Additional keyword arguments.
+
+        Returns:
+            Instance_Label_Map: The result of the instance matching.
+        """
         pass
 
     def match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> MatchedInstancePair:
+        """
+        Perform instance matching on the given UnmatchedInstancePair.
+
+        Args:
+            unmatched_instance_pair (UnmatchedInstancePair): The unmatched instance pair to be matched.
+            **kwargs: Additional keyword arguments.
+
+        Returns:
+            MatchedInstancePair: The result of the instance matching.
+        """
         instance_labelmap = self._match_instances(unmatched_instance_pair, **kwargs)
         print("instance_labelmap", instance_labelmap)
         return map_instance_labels(unmatched_instance_pair.copy(), instance_labelmap)
 
 
 class NaiveOneToOneMatching(InstanceMatchingAlgorithm):
+    """
+    Instance matching algorithm that performs one-to-one matching based on IoU values.
+
+    Attributes:
+        iou_threshold (float): The IoU threshold for matching instances.
+
+    Methods:
+        __init__(self, iou_threshold: float = 0.5) -> None:
+            Initialize the NaiveOneToOneMatching instance.
+        _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+            Perform one-to-one instance matching based on IoU values.
+
+    Raises:
+        AssertionError: If the specified IoU threshold is not within the valid range.
+
+    Example:
+    >>> matcher = NaiveOneToOneMatching(iou_threshold=0.6)
+    >>> unmatched_instance_pair = UnmatchedInstancePair(...)
+    >>> result = matcher.match_instances(unmatched_instance_pair)
+    """
+
     def __init__(self, iou_threshold: float = 0.5) -> None:
+        """
+        Initialize the NaiveOneToOneMatching instance.
+
+        Args:
+            iou_threshold (float, optional): The IoU threshold for matching instances. Defaults to 0.5.
+
+        Raises:
+            AssertionError: If the specified IoU threshold is not within the valid range.
+        """
         assert iou_threshold >= 0.5, "NaiveOneToOneMatching: iou_threshold lower than 0.5 doesnt work!"
         assert iou_threshold < 1.0, "NaiveOneToOneMatching: iou_threshold greater than or equal to 1.0 doesnt work!"
         self.iou_threshold = iou_threshold
 
     def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwargs) -> Instance_Label_Map:
+        """
+        Perform one-to-one instance matching based on IoU values.
+
+        Args:
+            unmatched_instance_pair (UnmatchedInstancePair): The unmatched instance pair to be matched.
+            **kwargs: Additional keyword arguments.
+
+        Returns:
+            Instance_Label_Map: The result of the instance matching.
+        """
         ref_labels = unmatched_instance_pair.ref_labels
         pred_labels = unmatched_instance_pair.pred_labels
         iou_matrix = _calc_iou_matrix(
@@ -49,6 +133,21 @@ def _match_instances(self, unmatched_instance_pair: UnmatchedInstancePair, **kwa
 
 
 def map_instance_labels(processing_pair: UnmatchedInstancePair, labelmap: Instance_Label_Map) -> MatchedInstancePair:
+    """
+    Map instance labels based on the provided labelmap and create a MatchedInstancePair.
+
+    Args:
+        processing_pair (UnmatchedInstancePair): The unmatched instance pair containing original labels.
+        labelmap (Instance_Label_Map): The instance label map obtained from instance matching.
+
+    Returns:
+        MatchedInstancePair: The result of mapping instance labels.
+
+    Example:
+    >>> unmatched_instance_pair = UnmatchedInstancePair(...)
+    >>> labelmap = [([1, 2], [3, 4]), ([5], [6])]
+    >>> result = map_instance_labels(unmatched_instance_pair, labelmap)
+    """
     prediction_arr, reference_arr = processing_pair.prediction_arr, processing_pair.reference_arr
 
     ref_labels = processing_pair.ref_labels
@@ -61,6 +160,7 @@ def map_instance_labels(processing_pair: UnmatchedInstancePair, labelmap: Instan
     ref_labelmap = {}
     label_counter = 1
     # TODO map only predictions onto reference, but vice versa (leave reference untouched, unmatched predictions get next best labels)
+    # -> that would mean only many-to-one matching allowed
 
     # Go over instance labelmap and assign the matched instance sequentially
     for refs, preds in labelmap:
diff --git a/panoptica/utils/datatypes.py b/panoptica/utils/datatypes.py
index 8c0cd69..0b3bd38 100644
--- a/panoptica/utils/datatypes.py
+++ b/panoptica/utils/datatypes.py
@@ -10,6 +10,11 @@
 
 
 class _ProcessingPair(ABC):
+    """
+    Represents a general processing pair consisting of a reference array and a prediction array. Type of array can be arbitrary (integer recommended)
+    Every member is read-only!
+    """
+
     prediction_arr: np.ndarray
     reference_arr: np.ndarray
     # unique labels without zero
@@ -17,6 +22,13 @@ class _ProcessingPair(ABC):
     pred_labels: tuple[int]
 
     def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None) -> None:
+        """Initializes a general Processing Pair
+
+        Args:
+            prediction_arr (np.ndarray): Numpy array containig the prediction labels
+            reference_arr (np.ndarray): Numpy array containig the reference labels
+            dtype (type | None): Datatype that is asserted. None for no assertion
+        """
         _check_array_integrity(prediction_arr, reference_arr, dtype=dtype)
         self.prediction_arr = prediction_arr
         self.reference_arr = reference_arr
@@ -32,6 +44,10 @@ def __setattr__(self, attr, value):
 
 
 class _ProcessingPairInstanced(_ProcessingPair):
+    """
+    A ProcessingPair that contains instances, additionally has number of instances available
+    """
+
     n_prediction_instance: int
     n_reference_instance: int
 
@@ -56,6 +72,9 @@ def __init__(
             self.n_reference_instance = n_reference_instance
 
     def copy(self):
+        """
+        Creates an exact copy of this object
+        """
         return type(self)(
             prediction_arr=self.prediction_arr,
             reference_arr=self.reference_arr,
@@ -65,6 +84,24 @@ def copy(self):
 
 
 def _check_array_integrity(prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None = None):
+    """
+    Check the integrity of two numpy arrays.
+
+    Parameters:
+    - prediction_arr (np.ndarray): The array to be checked.
+    - reference_arr (np.ndarray): The reference array for comparison.
+    - dtype (type | None): The expected data type for both arrays. Defaults to None.
+
+    Raises:
+    - AssertionError: If prediction_arr or reference_arr are not numpy arrays.
+    - AssertionError: If the shapes of prediction_arr and reference_arr do not match.
+    - AssertionError: If the data types of prediction_arr and reference_arr do not match.
+    - AssertionError: If dtype is provided and the data types of prediction_arr and/or reference_arr
+                     do not match the specified dtype.
+
+    Example:
+    >>> _check_array_integrity(np.array([1, 2, 3]), np.array([4, 5, 6]), dtype=int)
+    """
     assert isinstance(prediction_arr, np.ndarray) and isinstance(
         reference_arr, np.ndarray
     ), "prediction and/or reference are not numpy arrays"
@@ -79,21 +116,16 @@ def _check_array_integrity(prediction_arr: np.ndarray, reference_arr: np.ndarray
 
 
 class SemanticPair(_ProcessingPair):
-    """A Processing pair of any dtype
-
-    Args:
-        ProcessingPair (_type_): _description_
-    """
+    """A Processing pair that contains Semantic Labels"""
 
     def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray) -> None:
         super().__init__(prediction_arr, reference_arr, dtype=int_type)
 
 
 class UnmatchedInstancePair(_ProcessingPairInstanced):
-    """A Processing pair of any unsigned (but matching) integer type
-
-    Args:
-        ProcessingPairInstanced (_type_): _description_
+    """
+    A Processing pair that contain Unmatched Instance Maps
+    Can be of any unsigned (but matching) integer type
     """
 
     def __init__(
@@ -107,10 +139,9 @@ def __init__(
 
 
 class MatchedInstancePair(_ProcessingPairInstanced):
-    """A Processing pair of any unsigned (but matching) integer type consisting of only matched instance labels, as well as a list of missed labels from both
-
-    Args:
-        ProcessingPairInstanced (_type_): _description_
+    """
+    A Processing pair that contain Matched Instance Maps, i.e. each equal label in both maps are a match
+    Can be of any unsigned (but matching) integer type
     """
 
     missed_reference_labels: list[int]
@@ -127,6 +158,19 @@ def __init__(
         n_prediction_instance: int | None = None,
         n_reference_instance: int | None = None,
     ) -> None:
+        """Initializes a MatchedInstancePair
+
+        Args:
+            prediction_arr (np.ndarray): Numpy array containing the prediction matched instance labels
+            reference_arr (np.ndarray): Numpy array containing the reference matched instance labels
+            missed_reference_labels (list[int] | None, optional): List of unmatched reference labels. Defaults to None.
+            missed_prediction_labels (list[int] | None, optional): List of unmatched prediction labels. Defaults to None.
+            n_matched_instances (int | None, optional): Number of total matched instances, i.e. unique matched labels in both maps. Defaults to None.
+            n_prediction_instance (int | None, optional): Number of prediction instances. Defaults to None.
+            n_reference_instance (int | None, optional): Number of reference instances. Defaults to None.
+
+            For each argument: If none, will calculate on initialization.
+        """
         super().__init__(prediction_arr, reference_arr, uint_type, n_prediction_instance, n_reference_instance)  # type:ignore
         if n_matched_instances is None:
             n_matched_instances = len([i for i in self.pred_labels if i in self.ref_labels])
@@ -141,6 +185,9 @@ def __init__(
         self.missed_prediction_labels = missed_prediction_labels
 
     def copy(self):
+        """
+        Creates an exact copy of this object
+        """
         return type(self)(
             prediction_arr=self.prediction_arr,
             reference_arr=self.reference_arr,
diff --git a/panoptica/utils/numpy_utils.py b/panoptica/utils/numpy_utils.py
index 4e91604..196b761 100644
--- a/panoptica/utils/numpy_utils.py
+++ b/panoptica/utils/numpy_utils.py
@@ -37,6 +37,19 @@ def _count_unique_without_zeros(arr: np.ndarray) -> int:
 
 
 def _get_smallest_fitting_uint(max_value: int) -> type:
+    """
+    Determine the smallest unsigned integer type that can accommodate the given maximum value.
+
+    Args:
+        max_value (int): The maximum value to be accommodated.
+
+    Returns:
+        type: The NumPy data type (e.g., np.uint8, np.uint16, np.uint32, np.uint64).
+
+    Example:
+    >>> _get_smallest_fitting_uint(255)
+    <class 'numpy.uint8'>
+    """
     if max_value < 256:
         dtype = np.uint8
     elif max_value < 65536: