From a8fee6544cf39273d78acf4c29748ceae48828c5 Mon Sep 17 00:00:00 2001
From: Tim Koornstra <tim.koornstra@gmail.com>
Date: Fri, 26 Jan 2024 14:09:53 +0100
Subject: [PATCH 1/9] Add support for half-precision inference

---
 configs/extra_defaults.py |  2 +
 run.py                    | 81 ++++++++++++++++++++++++++++-----------
 2 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py
index 5254d7c..b668fad 100644
--- a/configs/extra_defaults.py
+++ b/configs/extra_defaults.py
@@ -23,6 +23,8 @@
 _C.MODEL.SEM_SEG_HEAD = CN()
 _C.MODEL.SEM_SEG_HEAD.WEIGHT = [1.0]
 
+_C.MODEL.HALF_PRECISION = False
+
 # Weights
 _C.TRAIN = CN()
 _C.TRAIN.WEIGHTS = ""
diff --git a/run.py b/run.py
index 2feb9fc..7427c27 100644
--- a/run.py
+++ b/run.py
@@ -27,18 +27,24 @@
 
 
 def get_arguments() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file")
+    parser = argparse.ArgumentParser(
+        description="Run file to inference using the model found in the config file")
 
     detectron2_args = parser.add_argument_group("detectron2")
 
-    detectron2_args.add_argument("-c", "--config", help="config file", required=True)
-    detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[])
+    detectron2_args.add_argument(
+        "-c", "--config", help="config file", required=True)
+    detectron2_args.add_argument(
+        "--opts", nargs="+", help="optional args to change", action="extend", default=[])
 
     io_args = parser.add_argument_group("IO")
-    io_args.add_argument("-i", "--input", nargs="+", help="Input folder", type=str, action="extend", required=True)
-    io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True)
+    io_args.add_argument("-i", "--input", nargs="+",
+                         help="Input folder", type=str, action="extend", required=True)
+    io_args.add_argument(
+        "-o", "--output", help="Output folder", type=str, required=True)
 
-    parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend")
+    parser.add_argument("-w", "--whitelist", nargs="+",
+                        help="Input folder", type=str, action="extend")
 
     args = parser.parse_args()
 
@@ -58,8 +64,12 @@ def __init__(self, cfg):
             cfg (CfgNode): config
         """
         self.cfg = cfg.clone()  # cfg can be modified by model
+
         self.model = build_model(self.cfg)
         self.model.eval()
+        if cfg.MODEL.HALF_PRECISION:
+            self.model.half()
+
         if len(cfg.DATASETS.TEST):
             self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
 
@@ -68,21 +78,27 @@ def __init__(self, cfg):
 
         checkpointer = DetectionCheckpointer(self.model)
         if not cfg.TEST.WEIGHTS:
-            raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS")
+            raise FileNotFoundError(
+                "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS")
 
         checkpointer.load(cfg.TEST.WEIGHTS)
 
         if cfg.INPUT.RESIZE_MODE == "none":
-            self.aug = ResizeScaling(scale=1)  # HACK percentage of 1 is no scaling
+            # HACK percentage of 1 is no scaling
+            self.aug = ResizeScaling(scale=1)
         elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             if cfg.INPUT.RESIZE_MODE == "shortest_edge":
-                self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
+                self.aug = ResizeShortestEdge(
+                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
             elif cfg.INPUT.RESIZE_MODE == "longest_edge":
-                self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
+                self.aug = ResizeLongestEdge(
+                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
         elif cfg.INPUT.RESIZE_MODE == "scaling":
-            self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST)
+            self.aug = ResizeScaling(
+                cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST)
         else:
-            raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode")
+            raise NotImplementedError(
+                f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode")
 
     def get_image_size(self, height: int, width: int) -> tuple[int, int]:
         """
@@ -102,14 +118,17 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]:
             new_height, new_width = height, width
         elif self.cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             new_height, new_width = self.aug.get_output_shape(
-                height, width, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST
+                height, width, self.cfg.INPUT.MIN_SIZE_TEST,
+                self.cfg.INPUT.MAX_SIZE_TEST
             )
         elif self.cfg.INPUT.RESIZE_MODE == "scaling":
             new_height, new_width = self.aug.get_output_shape(
-                height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST
+                height, width, self.cfg.INPUT.SCALING_TEST,
+                self.cfg.INPUT.MAX_SIZE_TEST
             )
         else:
-            raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode")
+            raise NotImplementedError(
+                f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode")
 
         return new_height, new_width
 
@@ -127,7 +146,12 @@ def gpu_call(self, original_image: torch.Tensor):
             # Apply pre-processing to image.
             channels, height, width = original_image.shape
             assert channels == 3, f"Must be a BGR image, found {channels} channels"
-            image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
+            image = torch.as_tensor(
+                original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
+
+            if self.cfg.MODEL.HALF_PRECISION:
+                image = image.half()
+
             if self.input_format == "BGR":
                 # whether the model expects BGR inputs or RGB
                 image = image[[2, 1, 0], :, :]
@@ -135,7 +159,8 @@ def gpu_call(self, original_image: torch.Tensor):
             new_height, new_width = self.get_image_size(height, width)
 
             if self.cfg.INPUT.RESIZE_MODE != "none":
-                image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0]
+                image = torch.nn.functional.interpolate(
+                    image[None], mode="bilinear", size=(new_height, new_width))[0]
 
             inputs = {"image": image, "height": new_height, "width": new_width}
             predictions = self.model([inputs])[0]
@@ -155,13 +180,20 @@ def cpu_call(self, original_image: np.ndarray):
             # Apply pre-processing to image.
             height, width, channels = original_image.shape
             assert channels == 3, f"Must be a RBG image, found {channels} channels"
-            image = self.aug.get_transform(original_image).apply_image(original_image)
-            image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
+            image = self.aug.get_transform(
+                original_image).apply_image(original_image)
+            image = torch.as_tensor(
+                image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
+
+            if self.cfg.MODEL.HALF_PRECISION:
+                image = image.half()
+
             if self.input_format == "BGR":
                 # whether the model expects BGR inputs or RGB
                 image = image[[2, 1, 0], :, :]
 
-            inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]}
+            inputs = {"image": image,
+                      "height": image.shape[1], "width": image.shape[2]}
             predictions = self.model([inputs])[0]
 
         return predictions, height, width
@@ -279,7 +311,8 @@ def set_output_dir(self, output_dir: str | Path) -> None:
             output_dir = Path(output_dir)
 
         if not output_dir.is_dir():
-            self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location")
+            self.logger.info(
+                f"Could not find output dir ({output_dir}), creating one at specified location")
             output_dir.mkdir(parents=True)
 
         self.output_dir = output_dir.resolve()
@@ -298,7 +331,8 @@ def save_prediction(self, image, input_path):
         if self.output_dir is None:
             raise TypeError("Cannot run when the output dir is None")
         if image is None:
-            self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now")
+            self.logger.warning(
+                f"Image at {input_path} has not loaded correctly, ignoring for now")
             return
 
         outputs = self.__call__(image)
@@ -307,7 +341,8 @@ def save_prediction(self, image, input_path):
         # output_image = torch.argmax(output_image, dim=-3).cpu().numpy()
 
         self.output_page.link_image(input_path)
-        self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2])
+        self.output_page.generate_single_page(
+            output_image, input_path, old_height=outputs[1], old_width=outputs[2])
 
     def process(self):
         """

From 6624fc1aaead763237f5877051455d8f2883c6aa Mon Sep 17 00:00:00 2001
From: TimKoornstra <TimKoornstra@users.noreply.github.com>
Date: Fri, 26 Jan 2024 13:14:38 +0000
Subject: [PATCH 2/9] :art: Format Python code with psf/black

---
 run.py | 66 ++++++++++++++++++++--------------------------------------
 1 file changed, 22 insertions(+), 44 deletions(-)

diff --git a/run.py b/run.py
index 7427c27..3b710e0 100644
--- a/run.py
+++ b/run.py
@@ -27,24 +27,18 @@
 
 
 def get_arguments() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Run file to inference using the model found in the config file")
+    parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file")
 
     detectron2_args = parser.add_argument_group("detectron2")
 
-    detectron2_args.add_argument(
-        "-c", "--config", help="config file", required=True)
-    detectron2_args.add_argument(
-        "--opts", nargs="+", help="optional args to change", action="extend", default=[])
+    detectron2_args.add_argument("-c", "--config", help="config file", required=True)
+    detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[])
 
     io_args = parser.add_argument_group("IO")
-    io_args.add_argument("-i", "--input", nargs="+",
-                         help="Input folder", type=str, action="extend", required=True)
-    io_args.add_argument(
-        "-o", "--output", help="Output folder", type=str, required=True)
+    io_args.add_argument("-i", "--input", nargs="+", help="Input folder", type=str, action="extend", required=True)
+    io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True)
 
-    parser.add_argument("-w", "--whitelist", nargs="+",
-                        help="Input folder", type=str, action="extend")
+    parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend")
 
     args = parser.parse_args()
 
@@ -78,8 +72,7 @@ def __init__(self, cfg):
 
         checkpointer = DetectionCheckpointer(self.model)
         if not cfg.TEST.WEIGHTS:
-            raise FileNotFoundError(
-                "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS")
+            raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS")
 
         checkpointer.load(cfg.TEST.WEIGHTS)
 
@@ -88,17 +81,13 @@ def __init__(self, cfg):
             self.aug = ResizeScaling(scale=1)
         elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             if cfg.INPUT.RESIZE_MODE == "shortest_edge":
-                self.aug = ResizeShortestEdge(
-                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
+                self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
             elif cfg.INPUT.RESIZE_MODE == "longest_edge":
-                self.aug = ResizeLongestEdge(
-                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
+                self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
         elif cfg.INPUT.RESIZE_MODE == "scaling":
-            self.aug = ResizeScaling(
-                cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST)
+            self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST)
         else:
-            raise NotImplementedError(
-                f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode")
+            raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode")
 
     def get_image_size(self, height: int, width: int) -> tuple[int, int]:
         """
@@ -118,17 +107,14 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]:
             new_height, new_width = height, width
         elif self.cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             new_height, new_width = self.aug.get_output_shape(
-                height, width, self.cfg.INPUT.MIN_SIZE_TEST,
-                self.cfg.INPUT.MAX_SIZE_TEST
+                height, width, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST
             )
         elif self.cfg.INPUT.RESIZE_MODE == "scaling":
             new_height, new_width = self.aug.get_output_shape(
-                height, width, self.cfg.INPUT.SCALING_TEST,
-                self.cfg.INPUT.MAX_SIZE_TEST
+                height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST
             )
         else:
-            raise NotImplementedError(
-                f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode")
+            raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode")
 
         return new_height, new_width
 
@@ -146,8 +132,7 @@ def gpu_call(self, original_image: torch.Tensor):
             # Apply pre-processing to image.
             channels, height, width = original_image.shape
             assert channels == 3, f"Must be a BGR image, found {channels} channels"
-            image = torch.as_tensor(
-                original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
+            image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
 
             if self.cfg.MODEL.HALF_PRECISION:
                 image = image.half()
@@ -159,8 +144,7 @@ def gpu_call(self, original_image: torch.Tensor):
             new_height, new_width = self.get_image_size(height, width)
 
             if self.cfg.INPUT.RESIZE_MODE != "none":
-                image = torch.nn.functional.interpolate(
-                    image[None], mode="bilinear", size=(new_height, new_width))[0]
+                image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0]
 
             inputs = {"image": image, "height": new_height, "width": new_width}
             predictions = self.model([inputs])[0]
@@ -180,10 +164,8 @@ def cpu_call(self, original_image: np.ndarray):
             # Apply pre-processing to image.
             height, width, channels = original_image.shape
             assert channels == 3, f"Must be a RBG image, found {channels} channels"
-            image = self.aug.get_transform(
-                original_image).apply_image(original_image)
-            image = torch.as_tensor(
-                image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
+            image = self.aug.get_transform(original_image).apply_image(original_image)
+            image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
 
             if self.cfg.MODEL.HALF_PRECISION:
                 image = image.half()
@@ -192,8 +174,7 @@ def cpu_call(self, original_image: np.ndarray):
                 # whether the model expects BGR inputs or RGB
                 image = image[[2, 1, 0], :, :]
 
-            inputs = {"image": image,
-                      "height": image.shape[1], "width": image.shape[2]}
+            inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]}
             predictions = self.model([inputs])[0]
 
         return predictions, height, width
@@ -311,8 +292,7 @@ def set_output_dir(self, output_dir: str | Path) -> None:
             output_dir = Path(output_dir)
 
         if not output_dir.is_dir():
-            self.logger.info(
-                f"Could not find output dir ({output_dir}), creating one at specified location")
+            self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location")
             output_dir.mkdir(parents=True)
 
         self.output_dir = output_dir.resolve()
@@ -331,8 +311,7 @@ def save_prediction(self, image, input_path):
         if self.output_dir is None:
             raise TypeError("Cannot run when the output dir is None")
         if image is None:
-            self.logger.warning(
-                f"Image at {input_path} has not loaded correctly, ignoring for now")
+            self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now")
             return
 
         outputs = self.__call__(image)
@@ -341,8 +320,7 @@ def save_prediction(self, image, input_path):
         # output_image = torch.argmax(output_image, dim=-3).cpu().numpy()
 
         self.output_page.link_image(input_path)
-        self.output_page.generate_single_page(
-            output_image, input_path, old_height=outputs[1], old_width=outputs[2])
+        self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2])
 
     def process(self):
         """

From f2468a51af24899ce63175af38821b230046fd62 Mon Sep 17 00:00:00 2001
From: Tim Koornstra <tim.koornstra@gmail.com>
Date: Mon, 29 Jan 2024 13:57:25 +0100
Subject: [PATCH 3/9] Use torch.autocast instead of .half

---
 configs/extra_defaults.py |   5 +-
 run.py                    | 109 ++++++++++++++++++++++++++++----------
 2 files changed, 83 insertions(+), 31 deletions(-)

diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py
index b668fad..d72db39 100644
--- a/configs/extra_defaults.py
+++ b/configs/extra_defaults.py
@@ -23,7 +23,7 @@
 _C.MODEL.SEM_SEG_HEAD = CN()
 _C.MODEL.SEM_SEG_HEAD.WEIGHT = [1.0]
 
-_C.MODEL.HALF_PRECISION = False
+_C.MODEL.AUTOCAST = True
 
 # Weights
 _C.TRAIN = CN()
@@ -217,7 +217,8 @@
 _C.INPUT.MAX_SCALE = 2.0
 
 # MSDeformAttn encoder configs
-_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"]
+_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = [
+    "res3", "res4", "res5"]
 _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4
 _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8
 
diff --git a/run.py b/run.py
index 3b710e0..b715d1c 100644
--- a/run.py
+++ b/run.py
@@ -27,18 +27,34 @@
 
 
 def get_arguments() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file")
+    parser = argparse.ArgumentParser(
+        description="Run file to inference using the model found in the config file"
+    )
 
     detectron2_args = parser.add_argument_group("detectron2")
 
     detectron2_args.add_argument("-c", "--config", help="config file", required=True)
-    detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[])
+    detectron2_args.add_argument(
+        "--opts", nargs="+", help="optional args to change", action="extend", default=[]
+    )
 
     io_args = parser.add_argument_group("IO")
-    io_args.add_argument("-i", "--input", nargs="+", help="Input folder", type=str, action="extend", required=True)
-    io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True)
+    io_args.add_argument(
+        "-i",
+        "--input",
+        nargs="+",
+        help="Input folder",
+        type=str,
+        action="extend",
+        required=True,
+    )
+    io_args.add_argument(
+        "-o", "--output", help="Output folder", type=str, required=True
+    )
 
-    parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend")
+    parser.add_argument(
+        "-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend"
+    )
 
     args = parser.parse_args()
 
@@ -61,8 +77,6 @@ def __init__(self, cfg):
 
         self.model = build_model(self.cfg)
         self.model.eval()
-        if cfg.MODEL.HALF_PRECISION:
-            self.model.half()
 
         if len(cfg.DATASETS.TEST):
             self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
@@ -72,7 +86,9 @@ def __init__(self, cfg):
 
         checkpointer = DetectionCheckpointer(self.model)
         if not cfg.TEST.WEIGHTS:
-            raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS")
+            raise FileNotFoundError(
+                "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS"
+            )
 
         checkpointer.load(cfg.TEST.WEIGHTS)
 
@@ -81,13 +97,19 @@ def __init__(self, cfg):
             self.aug = ResizeScaling(scale=1)
         elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             if cfg.INPUT.RESIZE_MODE == "shortest_edge":
-                self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
+                self.aug = ResizeShortestEdge(
+                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice"
+                )
             elif cfg.INPUT.RESIZE_MODE == "longest_edge":
-                self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
+                self.aug = ResizeLongestEdge(
+                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice"
+                )
         elif cfg.INPUT.RESIZE_MODE == "scaling":
             self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST)
         else:
-            raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode")
+            raise NotImplementedError(
+                f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode"
+            )
 
     def get_image_size(self, height: int, width: int) -> tuple[int, int]:
         """
@@ -107,14 +129,19 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]:
             new_height, new_width = height, width
         elif self.cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             new_height, new_width = self.aug.get_output_shape(
-                height, width, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST
+                height,
+                width,
+                self.cfg.INPUT.MIN_SIZE_TEST,
+                self.cfg.INPUT.MAX_SIZE_TEST,
             )
         elif self.cfg.INPUT.RESIZE_MODE == "scaling":
             new_height, new_width = self.aug.get_output_shape(
                 height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST
             )
         else:
-            raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode")
+            raise NotImplementedError(
+                f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode"
+            )
 
         return new_height, new_width
 
@@ -132,10 +159,9 @@ def gpu_call(self, original_image: torch.Tensor):
             # Apply pre-processing to image.
             channels, height, width = original_image.shape
             assert channels == 3, f"Must be a BGR image, found {channels} channels"
-            image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
-
-            if self.cfg.MODEL.HALF_PRECISION:
-                image = image.half()
+            image = torch.as_tensor(
+                original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE
+            )
 
             if self.input_format == "BGR":
                 # whether the model expects BGR inputs or RGB
@@ -144,10 +170,17 @@ def gpu_call(self, original_image: torch.Tensor):
             new_height, new_width = self.get_image_size(height, width)
 
             if self.cfg.INPUT.RESIZE_MODE != "none":
-                image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0]
+                image = torch.nn.functional.interpolate(
+                    image[None], mode="bilinear", size=(new_height, new_width)
+                )[0]
 
             inputs = {"image": image, "height": new_height, "width": new_width}
-            predictions = self.model([inputs])[0]
+
+            with torch.autocast(
+                device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST
+            ):
+                predictions = self.model([inputs])[0]
+
             return predictions, height, width
 
     def cpu_call(self, original_image: np.ndarray):
@@ -165,17 +198,20 @@ def cpu_call(self, original_image: np.ndarray):
             height, width, channels = original_image.shape
             assert channels == 3, f"Must be a RBG image, found {channels} channels"
             image = self.aug.get_transform(original_image).apply_image(original_image)
-            image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
-
-            if self.cfg.MODEL.HALF_PRECISION:
-                image = image.half()
+            image = torch.as_tensor(
+                image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE
+            ).permute(2, 0, 1)
 
             if self.input_format == "BGR":
                 # whether the model expects BGR inputs or RGB
                 image = image[[2, 1, 0], :, :]
 
             inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]}
-            predictions = self.model([inputs])[0]
+
+            with torch.autocast(
+                device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST
+            ):
+                predictions = self.model([inputs])[0]
 
         return predictions, height, width
 
@@ -218,7 +254,11 @@ def __getitem__(self, index):
 def collate_numpy(batch):
     collate_map = default_collate_fn_map
 
-    def new_map(batch, *, collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]], Callable]] = None):
+    def new_map(
+        batch,
+        *,
+        collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]], Callable]] = None,
+    ):
         return batch
 
     collate_map.update({np.ndarray: new_map, type(None): new_map})
@@ -292,7 +332,9 @@ def set_output_dir(self, output_dir: str | Path) -> None:
             output_dir = Path(output_dir)
 
         if not output_dir.is_dir():
-            self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location")
+            self.logger.info(
+                f"Could not find output dir ({output_dir}), creating one at specified location"
+            )
             output_dir.mkdir(parents=True)
 
         self.output_dir = output_dir.resolve()
@@ -311,7 +353,9 @@ def save_prediction(self, image, input_path):
         if self.output_dir is None:
             raise TypeError("Cannot run when the output dir is None")
         if image is None:
-            self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now")
+            self.logger.warning(
+                f"Image at {input_path} has not loaded correctly, ignoring for now"
+            )
             return
 
         outputs = self.__call__(image)
@@ -320,7 +364,9 @@ def save_prediction(self, image, input_path):
         # output_image = torch.argmax(output_image, dim=-3).cpu().numpy()
 
         self.output_page.link_image(input_path)
-        self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2])
+        self.output_page.generate_single_page(
+            output_image, input_path, old_height=outputs[1], old_width=outputs[2]
+        )
 
     def process(self):
         """
@@ -337,7 +383,12 @@ def process(self):
 
         dataset = LoadingDataset(self.input_paths)
         dataloader = DataLoader(
-            dataset, shuffle=False, batch_size=None, num_workers=16, pin_memory=False, collate_fn=collate_numpy
+            dataset,
+            shuffle=False,
+            batch_size=None,
+            num_workers=16,
+            pin_memory=False,
+            collate_fn=collate_numpy,
         )
         for inputs in tqdm(dataloader, desc="Predicting PageXML"):
             self.save_prediction(inputs[0], inputs[1])

From cbbd3baab6d73b5bb961eba5d47be1bdb2fdc6ac Mon Sep 17 00:00:00 2001
From: TimKoornstra <TimKoornstra@users.noreply.github.com>
Date: Mon, 29 Jan 2024 12:58:22 +0000
Subject: [PATCH 4/9] :art: Format Python code with psf/black

---
 configs/extra_defaults.py |  3 +-
 run.py                    | 68 ++++++++++-----------------------------
 2 files changed, 18 insertions(+), 53 deletions(-)

diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py
index d72db39..7c10fdc 100644
--- a/configs/extra_defaults.py
+++ b/configs/extra_defaults.py
@@ -217,8 +217,7 @@
 _C.INPUT.MAX_SCALE = 2.0
 
 # MSDeformAttn encoder configs
-_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = [
-    "res3", "res4", "res5"]
+_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"]
 _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4
 _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8
 
diff --git a/run.py b/run.py
index b715d1c..1b90042 100644
--- a/run.py
+++ b/run.py
@@ -27,16 +27,12 @@
 
 
 def get_arguments() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Run file to inference using the model found in the config file"
-    )
+    parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file")
 
     detectron2_args = parser.add_argument_group("detectron2")
 
     detectron2_args.add_argument("-c", "--config", help="config file", required=True)
-    detectron2_args.add_argument(
-        "--opts", nargs="+", help="optional args to change", action="extend", default=[]
-    )
+    detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[])
 
     io_args = parser.add_argument_group("IO")
     io_args.add_argument(
@@ -48,13 +44,9 @@ def get_arguments() -> argparse.Namespace:
         action="extend",
         required=True,
     )
-    io_args.add_argument(
-        "-o", "--output", help="Output folder", type=str, required=True
-    )
+    io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True)
 
-    parser.add_argument(
-        "-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend"
-    )
+    parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend")
 
     args = parser.parse_args()
 
@@ -86,9 +78,7 @@ def __init__(self, cfg):
 
         checkpointer = DetectionCheckpointer(self.model)
         if not cfg.TEST.WEIGHTS:
-            raise FileNotFoundError(
-                "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS"
-            )
+            raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS")
 
         checkpointer.load(cfg.TEST.WEIGHTS)
 
@@ -97,19 +87,13 @@ def __init__(self, cfg):
             self.aug = ResizeScaling(scale=1)
         elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]:
             if cfg.INPUT.RESIZE_MODE == "shortest_edge":
-                self.aug = ResizeShortestEdge(
-                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice"
-                )
+                self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
             elif cfg.INPUT.RESIZE_MODE == "longest_edge":
-                self.aug = ResizeLongestEdge(
-                    cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice"
-                )
+                self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice")
         elif cfg.INPUT.RESIZE_MODE == "scaling":
             self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST)
         else:
-            raise NotImplementedError(
-                f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode"
-            )
+            raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode")
 
     def get_image_size(self, height: int, width: int) -> tuple[int, int]:
         """
@@ -139,9 +123,7 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]:
                 height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST
             )
         else:
-            raise NotImplementedError(
-                f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode"
-            )
+            raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode")
 
         return new_height, new_width
 
@@ -159,9 +141,7 @@ def gpu_call(self, original_image: torch.Tensor):
             # Apply pre-processing to image.
             channels, height, width = original_image.shape
             assert channels == 3, f"Must be a BGR image, found {channels} channels"
-            image = torch.as_tensor(
-                original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE
-            )
+            image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
 
             if self.input_format == "BGR":
                 # whether the model expects BGR inputs or RGB
@@ -170,15 +150,11 @@ def gpu_call(self, original_image: torch.Tensor):
             new_height, new_width = self.get_image_size(height, width)
 
             if self.cfg.INPUT.RESIZE_MODE != "none":
-                image = torch.nn.functional.interpolate(
-                    image[None], mode="bilinear", size=(new_height, new_width)
-                )[0]
+                image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0]
 
             inputs = {"image": image, "height": new_height, "width": new_width}
 
-            with torch.autocast(
-                device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST
-            ):
+            with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
                 predictions = self.model([inputs])[0]
 
             return predictions, height, width
@@ -198,9 +174,7 @@ def cpu_call(self, original_image: np.ndarray):
             height, width, channels = original_image.shape
             assert channels == 3, f"Must be a RBG image, found {channels} channels"
             image = self.aug.get_transform(original_image).apply_image(original_image)
-            image = torch.as_tensor(
-                image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE
-            ).permute(2, 0, 1)
+            image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
 
             if self.input_format == "BGR":
                 # whether the model expects BGR inputs or RGB
@@ -208,9 +182,7 @@ def cpu_call(self, original_image: np.ndarray):
 
             inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]}
 
-            with torch.autocast(
-                device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST
-            ):
+            with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
                 predictions = self.model([inputs])[0]
 
         return predictions, height, width
@@ -332,9 +304,7 @@ def set_output_dir(self, output_dir: str | Path) -> None:
             output_dir = Path(output_dir)
 
         if not output_dir.is_dir():
-            self.logger.info(
-                f"Could not find output dir ({output_dir}), creating one at specified location"
-            )
+            self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location")
             output_dir.mkdir(parents=True)
 
         self.output_dir = output_dir.resolve()
@@ -353,9 +323,7 @@ def save_prediction(self, image, input_path):
         if self.output_dir is None:
             raise TypeError("Cannot run when the output dir is None")
         if image is None:
-            self.logger.warning(
-                f"Image at {input_path} has not loaded correctly, ignoring for now"
-            )
+            self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now")
             return
 
         outputs = self.__call__(image)
@@ -364,9 +332,7 @@ def save_prediction(self, image, input_path):
         # output_image = torch.argmax(output_image, dim=-3).cpu().numpy()
 
         self.output_page.link_image(input_path)
-        self.output_page.generate_single_page(
-            output_image, input_path, old_height=outputs[1], old_width=outputs[2]
-        )
+        self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2])
 
     def process(self):
         """

From b45e487e5f1c687fae0f62909d2ba8017597b94b Mon Sep 17 00:00:00 2001
From: Stefan Klut <stefanklut12@gmail.com>
Date: Tue, 30 Jan 2024 13:15:02 +0100
Subject: [PATCH 5/9] Change the use of self.format to just use the cfg

---
 run.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/run.py b/run.py
index 1b90042..c75e948 100644
--- a/run.py
+++ b/run.py
@@ -73,8 +73,7 @@ def __init__(self, cfg):
         if len(cfg.DATASETS.TEST):
             self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
 
-        self.input_format = cfg.INPUT.FORMAT
-        assert self.input_format in ["RGB", "BGR"], self.input_format
+        assert self.cfg.INPUT.FORMAT in ["RGB", "BGR"], self.cfg.INPUT.FORMAT
 
         checkpointer = DetectionCheckpointer(self.model)
         if not cfg.TEST.WEIGHTS:
@@ -143,7 +142,7 @@ def gpu_call(self, original_image: torch.Tensor):
             assert channels == 3, f"Must be a BGR image, found {channels} channels"
             image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE)
 
-            if self.input_format == "BGR":
+            if self.cfg.INPUT.FORMAT == "BGR":
                 # whether the model expects BGR inputs or RGB
                 image = image[[2, 1, 0], :, :]
 
@@ -176,7 +175,7 @@ def cpu_call(self, original_image: np.ndarray):
             image = self.aug.get_transform(original_image).apply_image(original_image)
             image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1)
 
-            if self.input_format == "BGR":
+            if self.cfg.INPUT.FORMAT == "BGR":
                 # whether the model expects BGR inputs or RGB
                 image = image[[2, 1, 0], :, :]
 

From b88ddd5e275e82793eeaa3c387b83a0cf1f44ec1 Mon Sep 17 00:00:00 2001
From: Stefan Klut <stefanklut12@gmail.com>
Date: Wed, 31 Jan 2024 14:16:44 +0100
Subject: [PATCH 6/9] NaN check

---
 run.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/run.py b/run.py
index c75e948..163bf83 100644
--- a/run.py
+++ b/run.py
@@ -184,6 +184,9 @@ def cpu_call(self, original_image: np.ndarray):
             with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
                 predictions = self.model([inputs])[0]
 
+            if torch.isnan(torch.ispredictions["sem_seg"]).any():
+                raise ValueError("NaN in predictions")
+
         return predictions, height, width
 
     def __call__(self, original_image):

From 29a5dd14cd887821899afa4228ff8fb8a3bc3c5f Mon Sep 17 00:00:00 2001
From: Stefan Klut <stefanklut12@gmail.com>
Date: Wed, 31 Jan 2024 14:41:53 +0100
Subject: [PATCH 7/9] Fix NaN check in predictions

---
 run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.py b/run.py
index 163bf83..193491b 100644
--- a/run.py
+++ b/run.py
@@ -184,7 +184,7 @@ def cpu_call(self, original_image: np.ndarray):
             with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
                 predictions = self.model([inputs])[0]
 
-            if torch.isnan(torch.ispredictions["sem_seg"]).any():
+            if torch.isnan(predictions["sem_seg"]).any():
                 raise ValueError("NaN in predictions")
 
         return predictions, height, width

From bc0fbd4a8a19428a8b8a235ff9f15f90fcf55983 Mon Sep 17 00:00:00 2001
From: Stefan Klut <stefanklut12@gmail.com>
Date: Wed, 31 Jan 2024 15:25:55 +0100
Subject: [PATCH 8/9] Commented out code that raises ValueError for NaN in
 predictions

---
 run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/run.py b/run.py
index 193491b..a29e3ff 100644
--- a/run.py
+++ b/run.py
@@ -184,8 +184,8 @@ def cpu_call(self, original_image: np.ndarray):
             with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
                 predictions = self.model([inputs])[0]
 
-            if torch.isnan(predictions["sem_seg"]).any():
-                raise ValueError("NaN in predictions")
+            # if torch.isnan(predictions["sem_seg"]).any():
+            #     raise ValueError("NaN in predictions")
 
         return predictions, height, width
 

From 493ecd140030ca7172f7c6e87857e068c2861564 Mon Sep 17 00:00:00 2001
From: Stefan Klut <stefanklut12@gmail.com>
Date: Thu, 1 Feb 2024 13:50:02 +0100
Subject: [PATCH 9/9] Update AMP training and testing configurations

---
 configs/extra_defaults.py |  7 ++++++-
 core/trainer.py           | 12 +++++++++++-
 run.py                    | 24 ++++++++++++++++++++++--
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py
index 7c10fdc..decb6e3 100644
--- a/configs/extra_defaults.py
+++ b/configs/extra_defaults.py
@@ -23,7 +23,12 @@
 _C.MODEL.SEM_SEG_HEAD = CN()
 _C.MODEL.SEM_SEG_HEAD.WEIGHT = [1.0]
 
-_C.MODEL.AUTOCAST = True
+_C.MODEL.AMP_TRAIN = CN()
+_C.MODEL.AMP_TRAIN.ENABLED = False
+_C.MODEL.AMP_TRAIN.PRECISION = "bfloat16"
+_C.MODEL.AMP_TEST = CN()
+_C.MODEL.AMP_TEST.ENABLED = True
+_C.MODEL.AMP_TEST.PRECISION = "bfloat16"
 
 # Weights
 _C.TRAIN = CN()
diff --git a/core/trainer.py b/core/trainer.py
index 4784945..370fe81 100644
--- a/core/trainer.py
+++ b/core/trainer.py
@@ -223,7 +223,17 @@ def __init__(self, cfg: CfgNode):
         data_loader = self.build_train_loader(cfg)
 
         model = create_ddp_model(model, broadcast_buffers=False)
-        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, data_loader, optimizer)
+        self._trainer = (AMPTrainer if cfg.MODEL.AMP_TRAIN.ENABLED else SimpleTrainer)(model, data_loader, optimizer)
+        if isinstance(self._trainer, AMPTrainer):
+            precision_converter = {
+                "float32": torch.float32,
+                "float16": torch.float16,
+                "bfloat16": torch.bfloat16,
+            }
+            precision = precision_converter.get(cfg.AMP_TRAIN.PRECISION, None)
+            if precision is None:
+                raise ValueError(f"Unrecognized precision: {cfg.AMP_TRAIN.PRECISION}")
+            self._trainer.precision = precision
 
         self.scheduler = self.build_lr_scheduler(cfg, optimizer)
 
diff --git a/run.py b/run.py
index a29e3ff..9c5d595 100644
--- a/run.py
+++ b/run.py
@@ -73,6 +73,15 @@ def __init__(self, cfg):
         if len(cfg.DATASETS.TEST):
             self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
 
+        precision_converter = {
+            "float32": torch.float32,
+            "float16": torch.float16,
+            "bfloat16": torch.bfloat16,
+        }
+        self.precision = precision_converter.get(cfg.MODEL.AMP_TEST.PRECISION, None)
+        if self.precision is None:
+            raise ValueError(f"Unrecognized precision: {cfg.MODEL.AMP_TEST.PRECISION}")
+
         assert self.cfg.INPUT.FORMAT in ["RGB", "BGR"], self.cfg.INPUT.FORMAT
 
         checkpointer = DetectionCheckpointer(self.model)
@@ -153,9 +162,16 @@ def gpu_call(self, original_image: torch.Tensor):
 
             inputs = {"image": image, "height": new_height, "width": new_width}
 
-            with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
+            with torch.autocast(
+                device_type=self.cfg.MODEL.DEVICE,
+                enabled=self.cfg.MODEL.AMP_TEST.ENABLED,
+                dtype=self.precision,
+            ):
                 predictions = self.model([inputs])[0]
 
+            # if torch.isnan(predictions["sem_seg"]).any():
+            #     raise ValueError("NaN in predictions")
+
             return predictions, height, width
 
     def cpu_call(self, original_image: np.ndarray):
@@ -181,7 +197,11 @@ def cpu_call(self, original_image: np.ndarray):
 
             inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]}
 
-            with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST):
+            with torch.autocast(
+                device_type=self.cfg.MODEL.DEVICE,
+                enabled=self.cfg.MODEL.AMP_TEST.ENABLED,
+                dtype=self.precision,
+            ):
                 predictions = self.model([inputs])[0]
 
             # if torch.isnan(predictions["sem_seg"]).any():