From a8fee6544cf39273d78acf4c29748ceae48828c5 Mon Sep 17 00:00:00 2001 From: Tim Koornstra Date: Fri, 26 Jan 2024 14:09:53 +0100 Subject: [PATCH 1/9] Add support for half-precision inference --- configs/extra_defaults.py | 2 + run.py | 81 ++++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py index 5254d7c..b668fad 100644 --- a/configs/extra_defaults.py +++ b/configs/extra_defaults.py @@ -23,6 +23,8 @@ _C.MODEL.SEM_SEG_HEAD = CN() _C.MODEL.SEM_SEG_HEAD.WEIGHT = [1.0] +_C.MODEL.HALF_PRECISION = False + # Weights _C.TRAIN = CN() _C.TRAIN.WEIGHTS = "" diff --git a/run.py b/run.py index 2feb9fc..7427c27 100644 --- a/run.py +++ b/run.py @@ -27,18 +27,24 @@ def get_arguments() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file") + parser = argparse.ArgumentParser( + description="Run file to inference using the model found in the config file") detectron2_args = parser.add_argument_group("detectron2") - detectron2_args.add_argument("-c", "--config", help="config file", required=True) - detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[]) + detectron2_args.add_argument( + "-c", "--config", help="config file", required=True) + detectron2_args.add_argument( + "--opts", nargs="+", help="optional args to change", action="extend", default=[]) io_args = parser.add_argument_group("IO") - io_args.add_argument("-i", "--input", nargs="+", help="Input folder", type=str, action="extend", required=True) - io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True) + io_args.add_argument("-i", "--input", nargs="+", + help="Input folder", type=str, action="extend", required=True) + io_args.add_argument( + "-o", "--output", help="Output folder", type=str, required=True) - parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend") + parser.add_argument("-w", "--whitelist", nargs="+", + help="Input folder", type=str, action="extend") args = parser.parse_args() @@ -58,8 +64,12 @@ def __init__(self, cfg): cfg (CfgNode): config """ self.cfg = cfg.clone() # cfg can be modified by model + self.model = build_model(self.cfg) self.model.eval() + if cfg.MODEL.HALF_PRECISION: + self.model.half() + if len(cfg.DATASETS.TEST): self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) @@ -68,21 +78,27 @@ def __init__(self, cfg): checkpointer = DetectionCheckpointer(self.model) if not cfg.TEST.WEIGHTS: - raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS") + raise FileNotFoundError( + "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS") checkpointer.load(cfg.TEST.WEIGHTS) if cfg.INPUT.RESIZE_MODE == "none": - self.aug = ResizeScaling(scale=1) # HACK percentage of 1 is no scaling + # HACK percentage of 1 is no scaling + self.aug = ResizeScaling(scale=1) elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: if cfg.INPUT.RESIZE_MODE == "shortest_edge": - self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") + self.aug = ResizeShortestEdge( + cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") elif cfg.INPUT.RESIZE_MODE == "longest_edge": - self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") + self.aug = ResizeLongestEdge( + cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") elif cfg.INPUT.RESIZE_MODE == "scaling": - self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST) + self.aug = ResizeScaling( + cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST) else: - raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode") + raise NotImplementedError( + f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode") def get_image_size(self, height: int, width: int) -> tuple[int, int]: """ @@ -102,14 +118,17 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]: new_height, new_width = height, width elif self.cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: new_height, new_width = self.aug.get_output_shape( - height, width, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST + height, width, self.cfg.INPUT.MIN_SIZE_TEST, + self.cfg.INPUT.MAX_SIZE_TEST ) elif self.cfg.INPUT.RESIZE_MODE == "scaling": new_height, new_width = self.aug.get_output_shape( - height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST + height, width, self.cfg.INPUT.SCALING_TEST, + self.cfg.INPUT.MAX_SIZE_TEST ) else: - raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode") + raise NotImplementedError( + f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode") return new_height, new_width @@ -127,7 +146,12 @@ def gpu_call(self, original_image: torch.Tensor): # Apply pre-processing to image. channels, height, width = original_image.shape assert channels == 3, f"Must be a BGR image, found {channels} channels" - image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) + image = torch.as_tensor( + original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) + + if self.cfg.MODEL.HALF_PRECISION: + image = image.half() + if self.input_format == "BGR": # whether the model expects BGR inputs or RGB image = image[[2, 1, 0], :, :] @@ -135,7 +159,8 @@ def gpu_call(self, original_image: torch.Tensor): new_height, new_width = self.get_image_size(height, width) if self.cfg.INPUT.RESIZE_MODE != "none": - image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0] + image = torch.nn.functional.interpolate( + image[None], mode="bilinear", size=(new_height, new_width))[0] inputs = {"image": image, "height": new_height, "width": new_width} predictions = self.model([inputs])[0] @@ -155,13 +180,20 @@ def cpu_call(self, original_image: np.ndarray): # Apply pre-processing to image. height, width, channels = original_image.shape assert channels == 3, f"Must be a RBG image, found {channels} channels" - image = self.aug.get_transform(original_image).apply_image(original_image) - image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) + image = self.aug.get_transform( + original_image).apply_image(original_image) + image = torch.as_tensor( + image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) + + if self.cfg.MODEL.HALF_PRECISION: + image = image.half() + if self.input_format == "BGR": # whether the model expects BGR inputs or RGB image = image[[2, 1, 0], :, :] - inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]} + inputs = {"image": image, + "height": image.shape[1], "width": image.shape[2]} predictions = self.model([inputs])[0] return predictions, height, width @@ -279,7 +311,8 @@ def set_output_dir(self, output_dir: str | Path) -> None: output_dir = Path(output_dir) if not output_dir.is_dir(): - self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location") + self.logger.info( + f"Could not find output dir ({output_dir}), creating one at specified location") output_dir.mkdir(parents=True) self.output_dir = output_dir.resolve() @@ -298,7 +331,8 @@ def save_prediction(self, image, input_path): if self.output_dir is None: raise TypeError("Cannot run when the output dir is None") if image is None: - self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now") + self.logger.warning( + f"Image at {input_path} has not loaded correctly, ignoring for now") return outputs = self.__call__(image) @@ -307,7 +341,8 @@ def save_prediction(self, image, input_path): # output_image = torch.argmax(output_image, dim=-3).cpu().numpy() self.output_page.link_image(input_path) - self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2]) + self.output_page.generate_single_page( + output_image, input_path, old_height=outputs[1], old_width=outputs[2]) def process(self): """ From 6624fc1aaead763237f5877051455d8f2883c6aa Mon Sep 17 00:00:00 2001 From: TimKoornstra Date: Fri, 26 Jan 2024 13:14:38 +0000 Subject: [PATCH 2/9] :art: Format Python code with psf/black --- run.py | 66 ++++++++++++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/run.py b/run.py index 7427c27..3b710e0 100644 --- a/run.py +++ b/run.py @@ -27,24 +27,18 @@ def get_arguments() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description="Run file to inference using the model found in the config file") + parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file") detectron2_args = parser.add_argument_group("detectron2") - detectron2_args.add_argument( - "-c", "--config", help="config file", required=True) - detectron2_args.add_argument( - "--opts", nargs="+", help="optional args to change", action="extend", default=[]) + detectron2_args.add_argument("-c", "--config", help="config file", required=True) + detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[]) io_args = parser.add_argument_group("IO") - io_args.add_argument("-i", "--input", nargs="+", - help="Input folder", type=str, action="extend", required=True) - io_args.add_argument( - "-o", "--output", help="Output folder", type=str, required=True) + io_args.add_argument("-i", "--input", nargs="+", help="Input folder", type=str, action="extend", required=True) + io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True) - parser.add_argument("-w", "--whitelist", nargs="+", - help="Input folder", type=str, action="extend") + parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend") args = parser.parse_args() @@ -78,8 +72,7 @@ def __init__(self, cfg): checkpointer = DetectionCheckpointer(self.model) if not cfg.TEST.WEIGHTS: - raise FileNotFoundError( - "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS") + raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS") checkpointer.load(cfg.TEST.WEIGHTS) @@ -88,17 +81,13 @@ def __init__(self, cfg): self.aug = ResizeScaling(scale=1) elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: if cfg.INPUT.RESIZE_MODE == "shortest_edge": - self.aug = ResizeShortestEdge( - cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") + self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") elif cfg.INPUT.RESIZE_MODE == "longest_edge": - self.aug = ResizeLongestEdge( - cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") + self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") elif cfg.INPUT.RESIZE_MODE == "scaling": - self.aug = ResizeScaling( - cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST) + self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST) else: - raise NotImplementedError( - f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode") + raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode") def get_image_size(self, height: int, width: int) -> tuple[int, int]: """ @@ -118,17 +107,14 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]: new_height, new_width = height, width elif self.cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: new_height, new_width = self.aug.get_output_shape( - height, width, self.cfg.INPUT.MIN_SIZE_TEST, - self.cfg.INPUT.MAX_SIZE_TEST + height, width, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST ) elif self.cfg.INPUT.RESIZE_MODE == "scaling": new_height, new_width = self.aug.get_output_shape( - height, width, self.cfg.INPUT.SCALING_TEST, - self.cfg.INPUT.MAX_SIZE_TEST + height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST ) else: - raise NotImplementedError( - f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode") + raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode") return new_height, new_width @@ -146,8 +132,7 @@ def gpu_call(self, original_image: torch.Tensor): # Apply pre-processing to image. channels, height, width = original_image.shape assert channels == 3, f"Must be a BGR image, found {channels} channels" - image = torch.as_tensor( - original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) + image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) if self.cfg.MODEL.HALF_PRECISION: image = image.half() @@ -159,8 +144,7 @@ def gpu_call(self, original_image: torch.Tensor): new_height, new_width = self.get_image_size(height, width) if self.cfg.INPUT.RESIZE_MODE != "none": - image = torch.nn.functional.interpolate( - image[None], mode="bilinear", size=(new_height, new_width))[0] + image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0] inputs = {"image": image, "height": new_height, "width": new_width} predictions = self.model([inputs])[0] @@ -180,10 +164,8 @@ def cpu_call(self, original_image: np.ndarray): # Apply pre-processing to image. height, width, channels = original_image.shape assert channels == 3, f"Must be a RBG image, found {channels} channels" - image = self.aug.get_transform( - original_image).apply_image(original_image) - image = torch.as_tensor( - image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) + image = self.aug.get_transform(original_image).apply_image(original_image) + image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) if self.cfg.MODEL.HALF_PRECISION: image = image.half() @@ -192,8 +174,7 @@ def cpu_call(self, original_image: np.ndarray): # whether the model expects BGR inputs or RGB image = image[[2, 1, 0], :, :] - inputs = {"image": image, - "height": image.shape[1], "width": image.shape[2]} + inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]} predictions = self.model([inputs])[0] return predictions, height, width @@ -311,8 +292,7 @@ def set_output_dir(self, output_dir: str | Path) -> None: output_dir = Path(output_dir) if not output_dir.is_dir(): - self.logger.info( - f"Could not find output dir ({output_dir}), creating one at specified location") + self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location") output_dir.mkdir(parents=True) self.output_dir = output_dir.resolve() @@ -331,8 +311,7 @@ def save_prediction(self, image, input_path): if self.output_dir is None: raise TypeError("Cannot run when the output dir is None") if image is None: - self.logger.warning( - f"Image at {input_path} has not loaded correctly, ignoring for now") + self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now") return outputs = self.__call__(image) @@ -341,8 +320,7 @@ def save_prediction(self, image, input_path): # output_image = torch.argmax(output_image, dim=-3).cpu().numpy() self.output_page.link_image(input_path) - self.output_page.generate_single_page( - output_image, input_path, old_height=outputs[1], old_width=outputs[2]) + self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2]) def process(self): """ From f2468a51af24899ce63175af38821b230046fd62 Mon Sep 17 00:00:00 2001 From: Tim Koornstra Date: Mon, 29 Jan 2024 13:57:25 +0100 Subject: [PATCH 3/9] Use torch.autocast instead of .half --- configs/extra_defaults.py | 5 +- run.py | 109 ++++++++++++++++++++++++++++---------- 2 files changed, 83 insertions(+), 31 deletions(-) diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py index b668fad..d72db39 100644 --- a/configs/extra_defaults.py +++ b/configs/extra_defaults.py @@ -23,7 +23,7 @@ _C.MODEL.SEM_SEG_HEAD = CN() _C.MODEL.SEM_SEG_HEAD.WEIGHT = [1.0] -_C.MODEL.HALF_PRECISION = False +_C.MODEL.AUTOCAST = True # Weights _C.TRAIN = CN() @@ -217,7 +217,8 @@ _C.INPUT.MAX_SCALE = 2.0 # MSDeformAttn encoder configs -_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"] +_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = [ + "res3", "res4", "res5"] _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4 _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8 diff --git a/run.py b/run.py index 3b710e0..b715d1c 100644 --- a/run.py +++ b/run.py @@ -27,18 +27,34 @@ def get_arguments() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file") + parser = argparse.ArgumentParser( + description="Run file to inference using the model found in the config file" + ) detectron2_args = parser.add_argument_group("detectron2") detectron2_args.add_argument("-c", "--config", help="config file", required=True) - detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[]) + detectron2_args.add_argument( + "--opts", nargs="+", help="optional args to change", action="extend", default=[] + ) io_args = parser.add_argument_group("IO") - io_args.add_argument("-i", "--input", nargs="+", help="Input folder", type=str, action="extend", required=True) - io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True) + io_args.add_argument( + "-i", + "--input", + nargs="+", + help="Input folder", + type=str, + action="extend", + required=True, + ) + io_args.add_argument( + "-o", "--output", help="Output folder", type=str, required=True + ) - parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend") + parser.add_argument( + "-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend" + ) args = parser.parse_args() @@ -61,8 +77,6 @@ def __init__(self, cfg): self.model = build_model(self.cfg) self.model.eval() - if cfg.MODEL.HALF_PRECISION: - self.model.half() if len(cfg.DATASETS.TEST): self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) @@ -72,7 +86,9 @@ def __init__(self, cfg): checkpointer = DetectionCheckpointer(self.model) if not cfg.TEST.WEIGHTS: - raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS") + raise FileNotFoundError( + "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS" + ) checkpointer.load(cfg.TEST.WEIGHTS) @@ -81,13 +97,19 @@ def __init__(self, cfg): self.aug = ResizeScaling(scale=1) elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: if cfg.INPUT.RESIZE_MODE == "shortest_edge": - self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") + self.aug = ResizeShortestEdge( + cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice" + ) elif cfg.INPUT.RESIZE_MODE == "longest_edge": - self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") + self.aug = ResizeLongestEdge( + cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice" + ) elif cfg.INPUT.RESIZE_MODE == "scaling": self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST) else: - raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode") + raise NotImplementedError( + f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode" + ) def get_image_size(self, height: int, width: int) -> tuple[int, int]: """ @@ -107,14 +129,19 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]: new_height, new_width = height, width elif self.cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: new_height, new_width = self.aug.get_output_shape( - height, width, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST + height, + width, + self.cfg.INPUT.MIN_SIZE_TEST, + self.cfg.INPUT.MAX_SIZE_TEST, ) elif self.cfg.INPUT.RESIZE_MODE == "scaling": new_height, new_width = self.aug.get_output_shape( height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST ) else: - raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode") + raise NotImplementedError( + f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode" + ) return new_height, new_width @@ -132,10 +159,9 @@ def gpu_call(self, original_image: torch.Tensor): # Apply pre-processing to image. channels, height, width = original_image.shape assert channels == 3, f"Must be a BGR image, found {channels} channels" - image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) - - if self.cfg.MODEL.HALF_PRECISION: - image = image.half() + image = torch.as_tensor( + original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE + ) if self.input_format == "BGR": # whether the model expects BGR inputs or RGB @@ -144,10 +170,17 @@ def gpu_call(self, original_image: torch.Tensor): new_height, new_width = self.get_image_size(height, width) if self.cfg.INPUT.RESIZE_MODE != "none": - image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0] + image = torch.nn.functional.interpolate( + image[None], mode="bilinear", size=(new_height, new_width) + )[0] inputs = {"image": image, "height": new_height, "width": new_width} - predictions = self.model([inputs])[0] + + with torch.autocast( + device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST + ): + predictions = self.model([inputs])[0] + return predictions, height, width def cpu_call(self, original_image: np.ndarray): @@ -165,17 +198,20 @@ def cpu_call(self, original_image: np.ndarray): height, width, channels = original_image.shape assert channels == 3, f"Must be a RBG image, found {channels} channels" image = self.aug.get_transform(original_image).apply_image(original_image) - image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) - - if self.cfg.MODEL.HALF_PRECISION: - image = image.half() + image = torch.as_tensor( + image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE + ).permute(2, 0, 1) if self.input_format == "BGR": # whether the model expects BGR inputs or RGB image = image[[2, 1, 0], :, :] inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]} - predictions = self.model([inputs])[0] + + with torch.autocast( + device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST + ): + predictions = self.model([inputs])[0] return predictions, height, width @@ -218,7 +254,11 @@ def __getitem__(self, index): def collate_numpy(batch): collate_map = default_collate_fn_map - def new_map(batch, *, collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]], Callable]] = None): + def new_map( + batch, + *, + collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]], Callable]] = None, + ): return batch collate_map.update({np.ndarray: new_map, type(None): new_map}) @@ -292,7 +332,9 @@ def set_output_dir(self, output_dir: str | Path) -> None: output_dir = Path(output_dir) if not output_dir.is_dir(): - self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location") + self.logger.info( + f"Could not find output dir ({output_dir}), creating one at specified location" + ) output_dir.mkdir(parents=True) self.output_dir = output_dir.resolve() @@ -311,7 +353,9 @@ def save_prediction(self, image, input_path): if self.output_dir is None: raise TypeError("Cannot run when the output dir is None") if image is None: - self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now") + self.logger.warning( + f"Image at {input_path} has not loaded correctly, ignoring for now" + ) return outputs = self.__call__(image) @@ -320,7 +364,9 @@ def save_prediction(self, image, input_path): # output_image = torch.argmax(output_image, dim=-3).cpu().numpy() self.output_page.link_image(input_path) - self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2]) + self.output_page.generate_single_page( + output_image, input_path, old_height=outputs[1], old_width=outputs[2] + ) def process(self): """ @@ -337,7 +383,12 @@ def process(self): dataset = LoadingDataset(self.input_paths) dataloader = DataLoader( - dataset, shuffle=False, batch_size=None, num_workers=16, pin_memory=False, collate_fn=collate_numpy + dataset, + shuffle=False, + batch_size=None, + num_workers=16, + pin_memory=False, + collate_fn=collate_numpy, ) for inputs in tqdm(dataloader, desc="Predicting PageXML"): self.save_prediction(inputs[0], inputs[1]) From cbbd3baab6d73b5bb961eba5d47be1bdb2fdc6ac Mon Sep 17 00:00:00 2001 From: TimKoornstra Date: Mon, 29 Jan 2024 12:58:22 +0000 Subject: [PATCH 4/9] :art: Format Python code with psf/black --- configs/extra_defaults.py | 3 +- run.py | 68 ++++++++++----------------------------- 2 files changed, 18 insertions(+), 53 deletions(-) diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py index d72db39..7c10fdc 100644 --- a/configs/extra_defaults.py +++ b/configs/extra_defaults.py @@ -217,8 +217,7 @@ _C.INPUT.MAX_SCALE = 2.0 # MSDeformAttn encoder configs -_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = [ - "res3", "res4", "res5"] +_C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"] _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4 _C.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8 diff --git a/run.py b/run.py index b715d1c..1b90042 100644 --- a/run.py +++ b/run.py @@ -27,16 +27,12 @@ def get_arguments() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description="Run file to inference using the model found in the config file" - ) + parser = argparse.ArgumentParser(description="Run file to inference using the model found in the config file") detectron2_args = parser.add_argument_group("detectron2") detectron2_args.add_argument("-c", "--config", help="config file", required=True) - detectron2_args.add_argument( - "--opts", nargs="+", help="optional args to change", action="extend", default=[] - ) + detectron2_args.add_argument("--opts", nargs="+", help="optional args to change", action="extend", default=[]) io_args = parser.add_argument_group("IO") io_args.add_argument( @@ -48,13 +44,9 @@ def get_arguments() -> argparse.Namespace: action="extend", required=True, ) - io_args.add_argument( - "-o", "--output", help="Output folder", type=str, required=True - ) + io_args.add_argument("-o", "--output", help="Output folder", type=str, required=True) - parser.add_argument( - "-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend" - ) + parser.add_argument("-w", "--whitelist", nargs="+", help="Input folder", type=str, action="extend") args = parser.parse_args() @@ -86,9 +78,7 @@ def __init__(self, cfg): checkpointer = DetectionCheckpointer(self.model) if not cfg.TEST.WEIGHTS: - raise FileNotFoundError( - "Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS" - ) + raise FileNotFoundError("Cannot do inference without weights. Specify a checkpoint file to --opts TEST.WEIGHTS") checkpointer.load(cfg.TEST.WEIGHTS) @@ -97,19 +87,13 @@ def __init__(self, cfg): self.aug = ResizeScaling(scale=1) elif cfg.INPUT.RESIZE_MODE in ["shortest_edge", "longest_edge"]: if cfg.INPUT.RESIZE_MODE == "shortest_edge": - self.aug = ResizeShortestEdge( - cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice" - ) + self.aug = ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") elif cfg.INPUT.RESIZE_MODE == "longest_edge": - self.aug = ResizeLongestEdge( - cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice" - ) + self.aug = ResizeLongestEdge(cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, "choice") elif cfg.INPUT.RESIZE_MODE == "scaling": self.aug = ResizeScaling(cfg.INPUT.SCALING_TEST, cfg.INPUT.MAX_SIZE_TEST) else: - raise NotImplementedError( - f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode" - ) + raise NotImplementedError(f"{cfg.INPUT.RESIZE_MODE} is not a known resize mode") def get_image_size(self, height: int, width: int) -> tuple[int, int]: """ @@ -139,9 +123,7 @@ def get_image_size(self, height: int, width: int) -> tuple[int, int]: height, width, self.cfg.INPUT.SCALING_TEST, self.cfg.INPUT.MAX_SIZE_TEST ) else: - raise NotImplementedError( - f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode" - ) + raise NotImplementedError(f"{self.cfg.INPUT.RESIZE_MODE} is not a known resize mode") return new_height, new_width @@ -159,9 +141,7 @@ def gpu_call(self, original_image: torch.Tensor): # Apply pre-processing to image. channels, height, width = original_image.shape assert channels == 3, f"Must be a BGR image, found {channels} channels" - image = torch.as_tensor( - original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE - ) + image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) if self.input_format == "BGR": # whether the model expects BGR inputs or RGB @@ -170,15 +150,11 @@ def gpu_call(self, original_image: torch.Tensor): new_height, new_width = self.get_image_size(height, width) if self.cfg.INPUT.RESIZE_MODE != "none": - image = torch.nn.functional.interpolate( - image[None], mode="bilinear", size=(new_height, new_width) - )[0] + image = torch.nn.functional.interpolate(image[None], mode="bilinear", size=(new_height, new_width))[0] inputs = {"image": image, "height": new_height, "width": new_width} - with torch.autocast( - device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST - ): + with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): predictions = self.model([inputs])[0] return predictions, height, width @@ -198,9 +174,7 @@ def cpu_call(self, original_image: np.ndarray): height, width, channels = original_image.shape assert channels == 3, f"Must be a RBG image, found {channels} channels" image = self.aug.get_transform(original_image).apply_image(original_image) - image = torch.as_tensor( - image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE - ).permute(2, 0, 1) + image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) if self.input_format == "BGR": # whether the model expects BGR inputs or RGB @@ -208,9 +182,7 @@ def cpu_call(self, original_image: np.ndarray): inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]} - with torch.autocast( - device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST - ): + with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): predictions = self.model([inputs])[0] return predictions, height, width @@ -332,9 +304,7 @@ def set_output_dir(self, output_dir: str | Path) -> None: output_dir = Path(output_dir) if not output_dir.is_dir(): - self.logger.info( - f"Could not find output dir ({output_dir}), creating one at specified location" - ) + self.logger.info(f"Could not find output dir ({output_dir}), creating one at specified location") output_dir.mkdir(parents=True) self.output_dir = output_dir.resolve() @@ -353,9 +323,7 @@ def save_prediction(self, image, input_path): if self.output_dir is None: raise TypeError("Cannot run when the output dir is None") if image is None: - self.logger.warning( - f"Image at {input_path} has not loaded correctly, ignoring for now" - ) + self.logger.warning(f"Image at {input_path} has not loaded correctly, ignoring for now") return outputs = self.__call__(image) @@ -364,9 +332,7 @@ def save_prediction(self, image, input_path): # output_image = torch.argmax(output_image, dim=-3).cpu().numpy() self.output_page.link_image(input_path) - self.output_page.generate_single_page( - output_image, input_path, old_height=outputs[1], old_width=outputs[2] - ) + self.output_page.generate_single_page(output_image, input_path, old_height=outputs[1], old_width=outputs[2]) def process(self): """ From b45e487e5f1c687fae0f62909d2ba8017597b94b Mon Sep 17 00:00:00 2001 From: Stefan Klut Date: Tue, 30 Jan 2024 13:15:02 +0100 Subject: [PATCH 5/9] Change the use of self.format to just use the cfg --- run.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/run.py b/run.py index 1b90042..c75e948 100644 --- a/run.py +++ b/run.py @@ -73,8 +73,7 @@ def __init__(self, cfg): if len(cfg.DATASETS.TEST): self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) - self.input_format = cfg.INPUT.FORMAT - assert self.input_format in ["RGB", "BGR"], self.input_format + assert self.cfg.INPUT.FORMAT in ["RGB", "BGR"], self.cfg.INPUT.FORMAT checkpointer = DetectionCheckpointer(self.model) if not cfg.TEST.WEIGHTS: @@ -143,7 +142,7 @@ def gpu_call(self, original_image: torch.Tensor): assert channels == 3, f"Must be a BGR image, found {channels} channels" image = torch.as_tensor(original_image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE) - if self.input_format == "BGR": + if self.cfg.INPUT.FORMAT == "BGR": # whether the model expects BGR inputs or RGB image = image[[2, 1, 0], :, :] @@ -176,7 +175,7 @@ def cpu_call(self, original_image: np.ndarray): image = self.aug.get_transform(original_image).apply_image(original_image) image = torch.as_tensor(image, dtype=torch.float32, device=self.cfg.MODEL.DEVICE).permute(2, 0, 1) - if self.input_format == "BGR": + if self.cfg.INPUT.FORMAT == "BGR": # whether the model expects BGR inputs or RGB image = image[[2, 1, 0], :, :] From b88ddd5e275e82793eeaa3c387b83a0cf1f44ec1 Mon Sep 17 00:00:00 2001 From: Stefan Klut Date: Wed, 31 Jan 2024 14:16:44 +0100 Subject: [PATCH 6/9] NaN check --- run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/run.py b/run.py index c75e948..163bf83 100644 --- a/run.py +++ b/run.py @@ -184,6 +184,9 @@ def cpu_call(self, original_image: np.ndarray): with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): predictions = self.model([inputs])[0] + if torch.isnan(torch.ispredictions["sem_seg"]).any(): + raise ValueError("NaN in predictions") + return predictions, height, width def __call__(self, original_image): From 29a5dd14cd887821899afa4228ff8fb8a3bc3c5f Mon Sep 17 00:00:00 2001 From: Stefan Klut Date: Wed, 31 Jan 2024 14:41:53 +0100 Subject: [PATCH 7/9] Fix NaN check in predictions --- run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.py b/run.py index 163bf83..193491b 100644 --- a/run.py +++ b/run.py @@ -184,7 +184,7 @@ def cpu_call(self, original_image: np.ndarray): with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): predictions = self.model([inputs])[0] - if torch.isnan(torch.ispredictions["sem_seg"]).any(): + if torch.isnan(predictions["sem_seg"]).any(): raise ValueError("NaN in predictions") return predictions, height, width From bc0fbd4a8a19428a8b8a235ff9f15f90fcf55983 Mon Sep 17 00:00:00 2001 From: Stefan Klut Date: Wed, 31 Jan 2024 15:25:55 +0100 Subject: [PATCH 8/9] Commented out code that raises ValueError for NaN in predictions --- run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.py b/run.py index 193491b..a29e3ff 100644 --- a/run.py +++ b/run.py @@ -184,8 +184,8 @@ def cpu_call(self, original_image: np.ndarray): with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): predictions = self.model([inputs])[0] - if torch.isnan(predictions["sem_seg"]).any(): - raise ValueError("NaN in predictions") + # if torch.isnan(predictions["sem_seg"]).any(): + # raise ValueError("NaN in predictions") return predictions, height, width From 493ecd140030ca7172f7c6e87857e068c2861564 Mon Sep 17 00:00:00 2001 From: Stefan Klut Date: Thu, 1 Feb 2024 13:50:02 +0100 Subject: [PATCH 9/9] Update AMP training and testing configurations --- configs/extra_defaults.py | 7 ++++++- core/trainer.py | 12 +++++++++++- run.py | 24 ++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/configs/extra_defaults.py b/configs/extra_defaults.py index 7c10fdc..decb6e3 100644 --- a/configs/extra_defaults.py +++ b/configs/extra_defaults.py @@ -23,7 +23,12 @@ _C.MODEL.SEM_SEG_HEAD = CN() _C.MODEL.SEM_SEG_HEAD.WEIGHT = [1.0] -_C.MODEL.AUTOCAST = True +_C.MODEL.AMP_TRAIN = CN() +_C.MODEL.AMP_TRAIN.ENABLED = False +_C.MODEL.AMP_TRAIN.PRECISION = "bfloat16" +_C.MODEL.AMP_TEST = CN() +_C.MODEL.AMP_TEST.ENABLED = True +_C.MODEL.AMP_TEST.PRECISION = "bfloat16" # Weights _C.TRAIN = CN() diff --git a/core/trainer.py b/core/trainer.py index 4784945..370fe81 100644 --- a/core/trainer.py +++ b/core/trainer.py @@ -223,7 +223,17 @@ def __init__(self, cfg: CfgNode): data_loader = self.build_train_loader(cfg) model = create_ddp_model(model, broadcast_buffers=False) - self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, data_loader, optimizer) + self._trainer = (AMPTrainer if cfg.MODEL.AMP_TRAIN.ENABLED else SimpleTrainer)(model, data_loader, optimizer) + if isinstance(self._trainer, AMPTrainer): + precision_converter = { + "float32": torch.float32, + "float16": torch.float16, + "bfloat16": torch.bfloat16, + } + precision = precision_converter.get(cfg.AMP_TRAIN.PRECISION, None) + if precision is None: + raise ValueError(f"Unrecognized precision: {cfg.AMP_TRAIN.PRECISION}") + self._trainer.precision = precision self.scheduler = self.build_lr_scheduler(cfg, optimizer) diff --git a/run.py b/run.py index a29e3ff..9c5d595 100644 --- a/run.py +++ b/run.py @@ -73,6 +73,15 @@ def __init__(self, cfg): if len(cfg.DATASETS.TEST): self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) + precision_converter = { + "float32": torch.float32, + "float16": torch.float16, + "bfloat16": torch.bfloat16, + } + self.precision = precision_converter.get(cfg.MODEL.AMP_TEST.PRECISION, None) + if self.precision is None: + raise ValueError(f"Unrecognized precision: {cfg.MODEL.AMP_TEST.PRECISION}") + assert self.cfg.INPUT.FORMAT in ["RGB", "BGR"], self.cfg.INPUT.FORMAT checkpointer = DetectionCheckpointer(self.model) @@ -153,9 +162,16 @@ def gpu_call(self, original_image: torch.Tensor): inputs = {"image": image, "height": new_height, "width": new_width} - with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): + with torch.autocast( + device_type=self.cfg.MODEL.DEVICE, + enabled=self.cfg.MODEL.AMP_TEST.ENABLED, + dtype=self.precision, + ): predictions = self.model([inputs])[0] + # if torch.isnan(predictions["sem_seg"]).any(): + # raise ValueError("NaN in predictions") + return predictions, height, width def cpu_call(self, original_image: np.ndarray): @@ -181,7 +197,11 @@ def cpu_call(self, original_image: np.ndarray): inputs = {"image": image, "height": image.shape[1], "width": image.shape[2]} - with torch.autocast(device_type=self.cfg.MODEL.DEVICE, enabled=self.cfg.MODEL.AUTOCAST): + with torch.autocast( + device_type=self.cfg.MODEL.DEVICE, + enabled=self.cfg.MODEL.AMP_TEST.ENABLED, + dtype=self.precision, + ): predictions = self.model([inputs])[0] # if torch.isnan(predictions["sem_seg"]).any():