Skip to content

Commit

Permalink
Merge branch 'confidence'
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanklut committed Jan 21, 2025
2 parents 5522a11 + 1b94215 commit 3ca0676
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 28 deletions.
161 changes: 133 additions & 28 deletions page_xml/output_pageXML.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(
whitelist: Optional[Iterable[str]] = None,
rectangle_regions: Optional[Iterable[str]] = None,
min_region_size: int = 10,
save_confidence_heatmap: bool = False,
) -> None:
"""
Class for the generation of the pageXML from class predictions on images
Expand Down Expand Up @@ -91,6 +92,7 @@ def __init__(
self.whitelist = set() if whitelist is None else set(whitelist)
self.min_region_size = min_region_size
self.rectangle_regions = set() if rectangle_regions is None else set(rectangle_regions)
self.save_confidence_heatmap = save_confidence_heatmap

def set_output_dir(self, output_dir: str | Path):
if isinstance(output_dir, str):
Expand Down Expand Up @@ -190,6 +192,81 @@ def generate_single_page_yolo(

page.save_xml()

@staticmethod
def scale_to_range(
tensor: torch.Tensor,
min_value: float = 0.0,
max_value: float = 1.0,
tensor_min: Optional[float] = None,
tensor_max: Optional[float] = None,
) -> torch.Tensor:
"""
Scale tensor to a range
Args:
image (torch.Tensor): image to be scaled
min_value (float, optional): minimum value of the range. Defaults to 0.0.
max_value (float, optional): maximum value of the range. Defaults to 1.0.
tensor_min (Optional[float], optional): minimum value of the tensor. Defaults to None.
tensor_max (Optional[float], optional): maximum value of the tensor. Defaults to None.
Returns:
torch.Tensor: scaled image
"""

if tensor_min is None:
tensor_min = torch.min(tensor).item()
if tensor_max is None:
tensor_max = torch.max(tensor).item()

tensor = (max_value - min_value) * (tensor - tensor_min) / (tensor_max - tensor_min) + min_value

return tensor

@staticmethod
def save_heatmap(scaled_confidence: torch.Tensor, confidence_output_path: Path):
"""
Save a heatmap of the confidence.
Args:
scaled_confidence (torch.Tensor): confidence as tensor.
confidence_output_path (Path): path to save the heatmap.
"""
confidence_grayscale = (scaled_confidence * 255).cpu().numpy().astype(np.uint8)
confidence_colored = cv2.applyColorMap(confidence_grayscale, cv2.COLORMAP_PLASMA)[..., ::-1]
with AtomicFileName(file_path=confidence_output_path) as path:
save_image_array_to_path(str(path), confidence_colored)

def sem_seg_to_classes_and_confidence(
self,
sem_seg: torch.Tensor,
height: Optional[int] = None,
width: Optional[int] = None,
) -> tuple[torch.Tensor, torch.Tensor]:
"""
Convert a single prediction into classes and confidence.
Args:
sem_seg (torch.Tensor): sem_seg as tensor.
Returns:
torch.Tensor, torch.Tensor: classes and confidence.
"""
sem_seg_normalized = torch.nn.functional.softmax(sem_seg, dim=-3)
if height is not None and width is not None:
sem_seg_interpolated = torch.nn.functional.interpolate(
sem_seg_normalized[None], size=(height, width), mode="bilinear", align_corners=False
)[0]
else:
sem_seg_interpolated = sem_seg_normalized

confidence, _ = torch.max(sem_seg_normalized, dim=-3)
sem_seg_classes = torch.argmax(sem_seg_interpolated, dim=-3)

scaled_confidence = self.scale_to_range(confidence, tensor_min=1 / len(self.xml_regions.regions), tensor_max=1.0)

return sem_seg_classes, scaled_confidence

def generate_single_page(
self,
sem_seg: torch.Tensor,
Expand All @@ -198,18 +275,18 @@ def generate_single_page(
old_width: Optional[int] = None,
):
"""
Convert a single prediction into a page
Convert a single prediction into a page.
Args:
sem_seg (torch.Tensor): sem_seg as tensor
image_path (Path): Image path, used for path name
sem_seg (torch.Tensor): sem_seg as tensor.
image_path (Path): Image path, used for path name.
old_height (Optional[int], optional): height of the original image. Defaults to None.
old_width (Optional[int], optional): width of the original image. Defaults to None.
Raises:
TypeError: Output dir has not been set
TypeError: Page dir has not been set
NotImplementedError: mode is not known
TypeError: Output dir has not been set.
TypeError: Page dir has not been set.
NotImplementedError: mode is not known.
"""
if self.output_dir is None:
raise TypeError("Output dir is None")
Expand All @@ -232,7 +309,17 @@ def generate_single_page(
page.add_processing_step(get_git_hash(), self.cfg.LAYPA_UUID, self.cfg, self.whitelist)

if self.xml_regions.mode == "region":
sem_seg = torch.argmax(sem_seg, dim=-3).cpu().numpy()
confidence_output_path = self.page_dir.joinpath(image_path.stem + "_confidence.png")
sem_seg_classes, confidence = self.sem_seg_to_classes_and_confidence(sem_seg)

# Apply a color map
if self.save_confidence_heatmap:
self.save_heatmap(confidence, confidence_output_path)

sem_seg_classes = sem_seg_classes.cpu().numpy()
mean_confidence = torch.mean(confidence).cpu().numpy().item()

page.add_confidence(mean_confidence)

region_id = 0

Expand All @@ -247,7 +334,7 @@ def generate_single_page(
contours, hierarchy = cv2.findContours(binary_region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for cnt in contours:
# remove small objects
# Remove small objects
if cnt.shape[0] < 4:
continue
if cv2.contourArea(cnt) < self.min_region_size:
Expand All @@ -257,11 +344,11 @@ def generate_single_page(

region_coords = ""
if region in self.rectangle_regions:
# find bounding box
# Find bounding box
rect = cv2.minAreaRect(cnt)
poly = cv2.boxPoints(rect) * scaling
else:
# soft a bit the region to prevent spikes
# Soft a bit the region to prevent spikes
epsilon = 0.0005 * cv2.arcLength(cnt, True)
approx_poly = cv2.approxPolyDP(cnt, epsilon, True)

Expand All @@ -276,23 +363,41 @@ def generate_single_page(

_uuid = uuid.uuid4()
text_reg = page.add_element(region_type, f"region_{_uuid}_{region_id}", region, region_coords)

elif self.xml_regions.mode in ["baseline", "start", "end", "separator"]:
# Push the calculation to outside of the python code <- mask is used by minion
sem_seg_output_path = self.page_dir.joinpath(image_path.stem + ".png")
sem_seg = torch.nn.functional.interpolate(
sem_seg[None], size=(old_height, old_width), mode="bilinear", align_corners=False
)[0]
sem_seg_image = torch.argmax(sem_seg, dim=-3).cpu().numpy()
confidence_output_path = self.page_dir.joinpath(image_path.stem + "_confidence.png")
sem_seg_classes, confidence = self.sem_seg_to_classes_and_confidence(sem_seg, old_height, old_width)

# Apply a color map
if self.save_confidence_heatmap:
self.save_heatmap(confidence, confidence_output_path)

sem_seg_classes = sem_seg_classes.cpu().numpy()
mean_confidence = torch.mean(confidence).cpu().numpy().item()

page.add_confidence(mean_confidence)

# Save the mask
with AtomicFileName(file_path=sem_seg_output_path) as path:
save_image_array_to_path(str(path), (sem_seg_image * 255).astype(np.uint8))
save_image_array_to_path(str(path), (sem_seg_classes * 255).astype(np.uint8))

elif self.xml_regions.mode in ["baseline_separator", "top_bottom"]:
sem_seg_output_path = self.page_dir.joinpath(image_path.stem + ".png")
sem_seg = torch.nn.functional.interpolate(
sem_seg[None], size=(old_height, old_width), mode="bilinear", align_corners=False
)[0]
sem_seg_image = torch.argmax(sem_seg, dim=-3).cpu().numpy()
confidence_output_path = self.page_dir.joinpath(image_path.stem + "_confidence.png")

sem_seg_classes, confidence = self.sem_seg_to_classes_and_confidence(sem_seg, old_height, old_width)

# Apply a color map
if self.save_confidence_heatmap:
self.save_heatmap(confidence, confidence_output_path)

sem_seg_classes = sem_seg_classes.cpu().numpy()
mean_confidence = torch.mean(confidence).cpu().numpy()

# Save the mask
with AtomicFileName(file_path=sem_seg_output_path) as path:
save_image_array_to_path(str(path), (sem_seg_image * 128).clip(0, 255).astype(np.uint8))
save_image_array_to_path(str(path), (sem_seg_classes * 128).clip(0, 255).astype(np.uint8))
else:
raise NotImplementedError(f"Mode {self.xml_regions.mode} not implemented")

Expand All @@ -301,13 +406,13 @@ def generate_single_page(

def generate_single_page_wrapper(self, info):
"""
Convert a single prediction into a page
Convert a single prediction into a page.
Args:
info (tuple[torch.Tensor | Path, Path]):
(tuple containing)
torch.Tensor | Path: mask as array or path to mask
Path: original image path
torch.Tensor | Path: mask as array or path to mask.
Path: original image path.
"""
mask, image_path = info
if isinstance(mask, Path):
Expand All @@ -321,14 +426,14 @@ def run(
image_path_list: list[Path],
) -> None:
"""
Generate pageXML for all sem_seg-image pairs in the lists
Generate pageXML for all sem_seg-image pairs in the lists.
Args:
sem_seg_list (list[np.ndarray] | list[Path]): all sem_seg as arrays or path to the sem_seg
image_path_list (list[Path]): path to the original image
sem_seg_list (list[np.ndarray] | list[Path]): all sem_seg as arrays or path to the sem_seg.
image_path_list (list[Path]): path to the original image.
Raises:
ValueError: length of sem_seg list and image list do not match
ValueError: length of sem_seg list and image list do not match.
"""

if len(sem_seg_list) != len(image_path_list):
Expand Down
13 changes: 13 additions & 0 deletions page_xml/xmlPAGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,19 @@ def add_processing_step(self, git_hash: str, uuid: str, cfg: CfgNode, whitelist:
"value": str(convert_to_dict(sub_node)),
}

def add_confidence(self, confidence: float):
if confidence is None:
raise TypeError(f"confidence is None")
if self.metadata is None:
raise TypeError(f"self.metadata is None")

confidence_element = ET.SubElement(self.metadata, "MetadataItem")
confidence_element.attrib = {
"type": "confidence",
"name": "layout-analysis",
"value": str(confidence),
}

def add_element(self, region_class, region_id, region_type, region_coords, parent=None):
"""add element to parent node"""
parent = self.page if parent == None else parent
Expand Down

0 comments on commit 3ca0676

Please sign in to comment.