Merge branch 'confidence'

stefanklut · Jan 21, 2025 · 3ca0676 · 3ca0676
2 parents 5522a11 + 1b94215
commit 3ca0676
Show file tree

Hide file tree

Showing 2 changed files with 146 additions and 28 deletions.
diff --git a/page_xml/output_pageXML.py b/page_xml/output_pageXML.py
@@ -58,6 +58,7 @@ def __init__(
         whitelist: Optional[Iterable[str]] = None,
         rectangle_regions: Optional[Iterable[str]] = None,
         min_region_size: int = 10,
+        save_confidence_heatmap: bool = False,
     ) -> None:
         """
         Class for the generation of the pageXML from class predictions on images
@@ -91,6 +92,7 @@ def __init__(
         self.whitelist = set() if whitelist is None else set(whitelist)
         self.min_region_size = min_region_size
         self.rectangle_regions = set() if rectangle_regions is None else set(rectangle_regions)
+        self.save_confidence_heatmap = save_confidence_heatmap
 
     def set_output_dir(self, output_dir: str | Path):
         if isinstance(output_dir, str):
@@ -190,6 +192,81 @@ def generate_single_page_yolo(
 
         page.save_xml()
 
+    @staticmethod
+    def scale_to_range(
+        tensor: torch.Tensor,
+        min_value: float = 0.0,
+        max_value: float = 1.0,
+        tensor_min: Optional[float] = None,
+        tensor_max: Optional[float] = None,
+    ) -> torch.Tensor:
+        """
+        Scale tensor to a range
+
+        Args:
+            image (torch.Tensor): image to be scaled
+            min_value (float, optional): minimum value of the range. Defaults to 0.0.
+            max_value (float, optional): maximum value of the range. Defaults to 1.0.
+            tensor_min (Optional[float], optional): minimum value of the tensor. Defaults to None.
+            tensor_max (Optional[float], optional): maximum value of the tensor. Defaults to None.
+
+        Returns:
+            torch.Tensor: scaled image
+        """
+
+        if tensor_min is None:
+            tensor_min = torch.min(tensor).item()
+        if tensor_max is None:
+            tensor_max = torch.max(tensor).item()
+
+        tensor = (max_value - min_value) * (tensor - tensor_min) / (tensor_max - tensor_min) + min_value
+
+        return tensor
+
+    @staticmethod
+    def save_heatmap(scaled_confidence: torch.Tensor, confidence_output_path: Path):
+        """
+        Save a heatmap of the confidence.
+
+        Args:
+            scaled_confidence (torch.Tensor): confidence as tensor.
+            confidence_output_path (Path): path to save the heatmap.
+        """
+        confidence_grayscale = (scaled_confidence * 255).cpu().numpy().astype(np.uint8)
+        confidence_colored = cv2.applyColorMap(confidence_grayscale, cv2.COLORMAP_PLASMA)[..., ::-1]
+        with AtomicFileName(file_path=confidence_output_path) as path:
+            save_image_array_to_path(str(path), confidence_colored)
+
+    def sem_seg_to_classes_and_confidence(
+        self,
+        sem_seg: torch.Tensor,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Convert a single prediction into classes and confidence.
+
+        Args:
+            sem_seg (torch.Tensor): sem_seg as tensor.
+
+        Returns:
+            torch.Tensor, torch.Tensor: classes and confidence.
+        """
+        sem_seg_normalized = torch.nn.functional.softmax(sem_seg, dim=-3)
+        if height is not None and width is not None:
+            sem_seg_interpolated = torch.nn.functional.interpolate(
+                sem_seg_normalized[None], size=(height, width), mode="bilinear", align_corners=False
+            )[0]
+        else:
+            sem_seg_interpolated = sem_seg_normalized
+
+        confidence, _ = torch.max(sem_seg_normalized, dim=-3)
+        sem_seg_classes = torch.argmax(sem_seg_interpolated, dim=-3)
+
+        scaled_confidence = self.scale_to_range(confidence, tensor_min=1 / len(self.xml_regions.regions), tensor_max=1.0)
+
+        return sem_seg_classes, scaled_confidence
+
     def generate_single_page(
         self,
         sem_seg: torch.Tensor,
@@ -198,18 +275,18 @@ def generate_single_page(
         old_width: Optional[int] = None,
     ):
         """
-        Convert a single prediction into a page
+        Convert a single prediction into a page.
 
         Args:
-            sem_seg (torch.Tensor): sem_seg as tensor
-            image_path (Path): Image path, used for path name
+            sem_seg (torch.Tensor): sem_seg as tensor.
+            image_path (Path): Image path, used for path name.
             old_height (Optional[int], optional): height of the original image. Defaults to None.
             old_width (Optional[int], optional): width of the original image. Defaults to None.
 
         Raises:
-            TypeError: Output dir has not been set
-            TypeError: Page dir has not been set
-            NotImplementedError: mode is not known
+            TypeError: Output dir has not been set.
+            TypeError: Page dir has not been set.
+            NotImplementedError: mode is not known.
         """
         if self.output_dir is None:
             raise TypeError("Output dir is None")
@@ -232,7 +309,17 @@ def generate_single_page(
             page.add_processing_step(get_git_hash(), self.cfg.LAYPA_UUID, self.cfg, self.whitelist)
 
         if self.xml_regions.mode == "region":
-            sem_seg = torch.argmax(sem_seg, dim=-3).cpu().numpy()
+            confidence_output_path = self.page_dir.joinpath(image_path.stem + "_confidence.png")
+            sem_seg_classes, confidence = self.sem_seg_to_classes_and_confidence(sem_seg)
+
+            # Apply a color map
+            if self.save_confidence_heatmap:
+                self.save_heatmap(confidence, confidence_output_path)
+
+            sem_seg_classes = sem_seg_classes.cpu().numpy()
+            mean_confidence = torch.mean(confidence).cpu().numpy().item()
+
+            page.add_confidence(mean_confidence)
 
             region_id = 0
 
@@ -247,7 +334,7 @@ def generate_single_page(
                 contours, hierarchy = cv2.findContours(binary_region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
                 for cnt in contours:
-                    # remove small objects
+                    # Remove small objects
                     if cnt.shape[0] < 4:
                         continue
                     if cv2.contourArea(cnt) < self.min_region_size:
@@ -257,11 +344,11 @@ def generate_single_page(
 
                     region_coords = ""
                     if region in self.rectangle_regions:
-                        # find bounding box
+                        # Find bounding box
                         rect = cv2.minAreaRect(cnt)
                         poly = cv2.boxPoints(rect) * scaling
                     else:
-                        # soft a bit the region to prevent spikes
+                        # Soft a bit the region to prevent spikes
                         epsilon = 0.0005 * cv2.arcLength(cnt, True)
                         approx_poly = cv2.approxPolyDP(cnt, epsilon, True)
 
@@ -276,23 +363,41 @@ def generate_single_page(
 
                     _uuid = uuid.uuid4()
                     text_reg = page.add_element(region_type, f"region_{_uuid}_{region_id}", region, region_coords)
+
         elif self.xml_regions.mode in ["baseline", "start", "end", "separator"]:
-            # Push the calculation to outside of the python code <- mask is used by minion
             sem_seg_output_path = self.page_dir.joinpath(image_path.stem + ".png")
-            sem_seg = torch.nn.functional.interpolate(
-                sem_seg[None], size=(old_height, old_width), mode="bilinear", align_corners=False
-            )[0]
-            sem_seg_image = torch.argmax(sem_seg, dim=-3).cpu().numpy()
+            confidence_output_path = self.page_dir.joinpath(image_path.stem + "_confidence.png")
+            sem_seg_classes, confidence = self.sem_seg_to_classes_and_confidence(sem_seg, old_height, old_width)
+
+            # Apply a color map
+            if self.save_confidence_heatmap:
+                self.save_heatmap(confidence, confidence_output_path)
+
+            sem_seg_classes = sem_seg_classes.cpu().numpy()
+            mean_confidence = torch.mean(confidence).cpu().numpy().item()
+
+            page.add_confidence(mean_confidence)
+
+            # Save the mask
             with AtomicFileName(file_path=sem_seg_output_path) as path:
-                save_image_array_to_path(str(path), (sem_seg_image * 255).astype(np.uint8))
+                save_image_array_to_path(str(path), (sem_seg_classes * 255).astype(np.uint8))
+
         elif self.xml_regions.mode in ["baseline_separator", "top_bottom"]:
             sem_seg_output_path = self.page_dir.joinpath(image_path.stem + ".png")
-            sem_seg = torch.nn.functional.interpolate(
-                sem_seg[None], size=(old_height, old_width), mode="bilinear", align_corners=False
-            )[0]
-            sem_seg_image = torch.argmax(sem_seg, dim=-3).cpu().numpy()
+            confidence_output_path = self.page_dir.joinpath(image_path.stem + "_confidence.png")
+
+            sem_seg_classes, confidence = self.sem_seg_to_classes_and_confidence(sem_seg, old_height, old_width)
+
+            # Apply a color map
+            if self.save_confidence_heatmap:
+                self.save_heatmap(confidence, confidence_output_path)
+
+            sem_seg_classes = sem_seg_classes.cpu().numpy()
+            mean_confidence = torch.mean(confidence).cpu().numpy()
+
+            # Save the mask
             with AtomicFileName(file_path=sem_seg_output_path) as path:
-                save_image_array_to_path(str(path), (sem_seg_image * 128).clip(0, 255).astype(np.uint8))
+                save_image_array_to_path(str(path), (sem_seg_classes * 128).clip(0, 255).astype(np.uint8))
         else:
             raise NotImplementedError(f"Mode {self.xml_regions.mode} not implemented")
 
@@ -301,13 +406,13 @@ def generate_single_page(
 
     def generate_single_page_wrapper(self, info):
         """
-        Convert a single prediction into a page
+        Convert a single prediction into a page.
 
         Args:
             info (tuple[torch.Tensor | Path, Path]):
                 (tuple containing)
-                torch.Tensor | Path: mask as array or path to mask
-                Path: original image path
+                torch.Tensor | Path: mask as array or path to mask.
+                Path: original image path.
         """
         mask, image_path = info
         if isinstance(mask, Path):
@@ -321,14 +426,14 @@ def run(
         image_path_list: list[Path],
     ) -> None:
         """
-        Generate pageXML for all sem_seg-image pairs in the lists
+        Generate pageXML for all sem_seg-image pairs in the lists.
 
         Args:
-            sem_seg_list (list[np.ndarray] | list[Path]): all sem_seg as arrays or path to the sem_seg
-            image_path_list (list[Path]): path to the original image
+            sem_seg_list (list[np.ndarray] | list[Path]): all sem_seg as arrays or path to the sem_seg.
+            image_path_list (list[Path]): path to the original image.
 
         Raises:
-            ValueError: length of sem_seg list and image list do not match
+            ValueError: length of sem_seg list and image list do not match.
         """
 
         if len(sem_seg_list) != len(image_path_list):

diff --git a/page_xml/xmlPAGE.py b/page_xml/xmlPAGE.py
@@ -312,6 +312,19 @@ def add_processing_step(self, git_hash: str, uuid: str, cfg: CfgNode, whitelist:
                 "value": str(convert_to_dict(sub_node)),
             }
 
+    def add_confidence(self, confidence: float):
+        if confidence is None:
+            raise TypeError(f"confidence is None")
+        if self.metadata is None:
+            raise TypeError(f"self.metadata is None")
+
+        confidence_element = ET.SubElement(self.metadata, "MetadataItem")
+        confidence_element.attrib = {
+            "type": "confidence",
+            "name": "layout-analysis",
+            "value": str(confidence),
+        }
+
     def add_element(self, region_class, region_id, region_type, region_coords, parent=None):
         """add element to parent node"""
         parent = self.page if parent == None else parent