From 464884f65a3f23a0f039155d24f2b8e7663731dc Mon Sep 17 00:00:00 2001 From: Vladislav Sovrasov Date: Fri, 17 May 2024 09:47:28 +0200 Subject: [PATCH] Add a flag to enable OV inference on dGPU (#3503) * dGPU inference for OV models * Extract reading of hparams in OVModel * Fix usage of get_user_config * Fix ruff * Add for cpu flag to ov model * Fix missing ov core * Fix plugin coniguration * Add one more unit test for OVModel * Fix imports * Revert inf exp changes --------- Co-authored-by: kprokofi --- README.md | 2 +- src/otx/core/model/base.py | 37 +++++++++++++++++++-- src/otx/core/model/classification.py | 12 ++++--- src/otx/core/model/detection.py | 29 +++++----------- src/otx/core/model/instance_segmentation.py | 31 ++++++----------- src/otx/core/model/segmentation.py | 4 +-- tests/integration/cli/test_cli.py | 2 +- tests/unit/core/model/test_base.py | 3 ++ 8 files changed, 67 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 960b0e9ad94..b88265a2502 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ [![python](https://img.shields.io/badge/python-3.10%2B-green)]() [![pytorch](https://img.shields.io/badge/pytorch-2.1.1%2B-orange)]() -[![openvino](https://img.shields.io/badge/openvino-2023.3.0-purple)]() +[![openvino](https://img.shields.io/badge/openvino-2024.0-purple)]() diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py index d5fcc8bc32e..c2e52d19a77 100644 --- a/src/otx/core/model/base.py +++ b/src/otx/core/model/base.py @@ -58,6 +58,7 @@ from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable from lightning.pytorch.utilities.types import LRSchedulerTypeUnion, OptimizerLRScheduler + from model_api.adapters import OpenvinoAdapter from torch.optim.lr_scheduler import LRScheduler from torch.optim.optimizer import Optimizer, params_t @@ -807,6 +808,7 @@ def __init__( model_name: str, model_type: str, async_inference: bool = True, + force_cpu: bool = True, max_num_requests: int | None = None, use_throughput_mode: bool = True, model_api_configuration: dict[str, Any] | None = None, @@ -815,6 +817,7 @@ def __init__( ) -> None: self.model_name = model_name self.model_type = model_type + self.force_cpu = force_cpu self.async_inference = async_inference self.num_requests = max_num_requests if max_num_requests is not None else get_default_num_async_infer_requests() self.use_throughput_mode = use_throughput_mode @@ -835,21 +838,49 @@ def _setup_tiler(self) -> None: """Setup tiler for tile task.""" raise NotImplementedError + def _get_hparams_from_adapter(self, model_adapter: OpenvinoAdapter) -> None: + """Reads model configuration from ModelAPI OpenVINO adapter. + + Args: + model_adapter (OpenvinoAdapter): target adapter to read the config + """ + def _create_model(self) -> Model: """Create a OV model with help of Model API.""" - from model_api.adapters import OpenvinoAdapter, create_core, get_user_config + from model_api.adapters import OpenvinoAdapter, create_core + + if self.device.type != "cpu": + msg = ( + f"Device {self.device.type} is set for Lightning module, but the actual inference " + "device is selected by OpenVINO." + ) + logger.warning(msg) - plugin_config = get_user_config("AUTO", str(self.num_requests), "AUTO") + ov_device = "CPU" + ie = create_core() + if not self.force_cpu: + devices = ie.available_devices + for device in devices: + device_name = ie.get_property(device_name=device, property="FULL_DEVICE_NAME") + if "dGPU" in device_name and "Intel" in device_name: + ov_device = device + break + + plugin_config = {} if self.use_throughput_mode: plugin_config["PERFORMANCE_HINT"] = "THROUGHPUT" model_adapter = OpenvinoAdapter( - create_core(), + ie, self.model_name, + device=ov_device, max_num_requests=self.num_requests, plugin_config=plugin_config, model_parameters=self.model_adapter_parameters, ) + + self._get_hparams_from_adapter(model_adapter) + return Model.create_model(model_adapter, model_type=self.model_type, configuration=self.model_api_configuration) def _customize_inputs(self, entity: T_OTXBatchDataEntity) -> dict[str, Any]: diff --git a/src/otx/core/model/classification.py b/src/otx/core/model/classification.py index 94a5a77f3cf..c189b9b9e32 100644 --- a/src/otx/core/model/classification.py +++ b/src/otx/core/model/classification.py @@ -672,8 +672,8 @@ def _customize_outputs( outputs: list[ClassificationResult], inputs: MulticlassClsBatchDataEntity, ) -> MulticlassClsBatchPredEntity: - pred_labels = [torch.tensor(out.top_labels[0][0], dtype=torch.long) for out in outputs] - pred_scores = [torch.tensor(out.top_labels[0][2]) for out in outputs] + pred_labels = [torch.tensor(out.top_labels[0][0], dtype=torch.long, device=self.device) for out in outputs] + pred_scores = [torch.tensor(out.top_labels[0][2], device=self.device) for out in outputs] if outputs and outputs[0].saliency_map.size != 0: # Squeeze dim 4D => 3D, (1, num_classes, H, W) => (num_classes, H, W) @@ -747,7 +747,9 @@ def _customize_outputs( outputs: list[ClassificationResult], inputs: MultilabelClsBatchDataEntity, ) -> MultilabelClsBatchPredEntity: - pred_scores = [torch.tensor([top_label[2] for top_label in out.top_labels]) for out in outputs] + pred_scores = [ + torch.tensor([top_label[2] for top_label in out.top_labels], device=self.device) for out in outputs + ] if outputs and outputs[0].saliency_map.size != 0: # Squeeze dim 4D => 3D, (1, num_classes, H, W) => (num_classes, H, W) @@ -844,8 +846,8 @@ def _customize_outputs( else: predicted_labels.append(0) - all_pred_labels.append(torch.tensor(predicted_labels, dtype=torch.long)) - all_pred_scores.append(torch.tensor(predicted_scores)) + all_pred_labels.append(torch.tensor(predicted_labels, dtype=torch.long, device=self.device)) + all_pred_scores.append(torch.tensor(predicted_scores, device=self.device)) if outputs and outputs[0].saliency_map.size != 0: # Squeeze dim 4D => 3D, (1, num_classes, H, W) => (num_classes, H, W) diff --git a/src/otx/core/model/detection.py b/src/otx/core/model/detection.py index 57bc70ced9a..bc6bcd995c5 100644 --- a/src/otx/core/model/detection.py +++ b/src/otx/core/model/detection.py @@ -11,7 +11,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal import torch -from model_api.models import Model from model_api.tilers import DetectionTiler from torchvision import tv_tensors @@ -31,6 +30,7 @@ if TYPE_CHECKING: from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable from mmdet.models.data_preprocessors import DetDataPreprocessor + from model_api.adapters import OpenvinoAdapter from model_api.models.utils import DetectionResult from omegaconf import DictConfig from torch import nn @@ -520,22 +520,12 @@ def _setup_tiler(self) -> None: and overlap: {self.model.tiles_overlap}", ) - def _create_model(self) -> Model: - """Create a OV model with help of Model API.""" - from model_api.adapters import OpenvinoAdapter, create_core, get_user_config - - plugin_config = get_user_config("AUTO", str(self.num_requests), "AUTO") - if self.use_throughput_mode: - plugin_config["PERFORMANCE_HINT"] = "THROUGHPUT" - - model_adapter = OpenvinoAdapter( - create_core(), - self.model_name, - max_num_requests=self.num_requests, - plugin_config=plugin_config, - model_parameters=self.model_adapter_parameters, - ) + def _get_hparams_from_adapter(self, model_adapter: OpenvinoAdapter) -> None: + """Reads model configuration from ModelAPI OpenVINO adapter. + Args: + model_adapter (OpenvinoAdapter): target adapter to read the config + """ if model_adapter.model.has_rt_info(["model_info", "confidence_threshold"]): best_confidence_threshold = model_adapter.model.get_rt_info(["model_info", "confidence_threshold"]).value self.hparams["best_confidence_threshold"] = float(best_confidence_threshold) @@ -549,8 +539,6 @@ def _create_model(self) -> Model: log.warning(msg) self.hparams["best_confidence_threshold"] = None - return Model.create_model(model_adapter, model_type=self.model_type, configuration=self.model_api_configuration) - def _customize_outputs( self, outputs: list[DetectionResult], @@ -583,10 +571,11 @@ def _customize_outputs( bbox, format="XYXY", canvas_size=inputs.imgs_info[-1].img_shape, + device=self.device, ), ) - scores.append(torch.tensor([output.score for output in output_objects])) - labels.append(torch.tensor([output.id - label_shift for output in output_objects])) + scores.append(torch.tensor([output.score for output in output_objects], device=self.device)) + labels.append(torch.tensor([output.id - label_shift for output in output_objects], device=self.device)) if outputs and outputs[0].saliency_map.size > 1: # Squeeze dim 4D => 3D, (1, num_classes, H, W) => (num_classes, H, W) diff --git a/src/otx/core/model/instance_segmentation.py b/src/otx/core/model/instance_segmentation.py index ae8652bc2c0..05f69d964dc 100644 --- a/src/otx/core/model/instance_segmentation.py +++ b/src/otx/core/model/instance_segmentation.py @@ -12,7 +12,6 @@ import numpy as np import torch -from model_api.models import Model from model_api.tilers import InstanceSegmentationTiler from torchvision import tv_tensors @@ -36,6 +35,7 @@ if TYPE_CHECKING: from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable from mmdet.models.data_preprocessors import DetDataPreprocessor + from model_api.adapters import OpenvinoAdapter from model_api.models.utils import InstanceSegmentationResult from omegaconf import DictConfig from torch import nn @@ -579,22 +579,12 @@ def _setup_tiler(self) -> None: and overlap: {self.model.tiles_overlap}", ) - def _create_model(self) -> Model: - """Create a OV model with help of Model API.""" - from model_api.adapters import OpenvinoAdapter, create_core, get_user_config - - plugin_config = get_user_config("AUTO", str(self.num_requests), "AUTO") - if self.use_throughput_mode: - plugin_config["PERFORMANCE_HINT"] = "THROUGHPUT" - - model_adapter = OpenvinoAdapter( - create_core(), - self.model_name, - max_num_requests=self.num_requests, - plugin_config=plugin_config, - model_parameters=self.model_adapter_parameters, - ) + def _get_hparams_from_adapter(self, model_adapter: OpenvinoAdapter) -> None: + """Reads model configuration from ModelAPI OpenVINO adapter. + Args: + model_adapter (OpenvinoAdapter): target adapter to read the config + """ if model_adapter.model.has_rt_info(["model_info", "confidence_threshold"]): best_confidence_threshold = model_adapter.model.get_rt_info(["model_info", "confidence_threshold"]).value self.hparams["best_confidence_threshold"] = float(best_confidence_threshold) @@ -608,8 +598,6 @@ def _create_model(self) -> Model: log.warning(msg) self.hparams["best_confidence_threshold"] = None - return Model.create_model(model_adapter, model_type=self.model_type, configuration=self.model_api_configuration) - def _customize_outputs( self, outputs: list[InstanceSegmentationResult], @@ -631,15 +619,16 @@ def _customize_outputs( bbox, format="XYXY", canvas_size=inputs.imgs_info[-1].img_shape, + device=self.device, ), ) # NOTE: OTX 1.5 filter predictions with result_based_confidence_threshold, # but OTX 2.0 doesn't have it in configuration. _masks = [output.mask for output in output_objects] _masks = np.stack(_masks) if len(_masks) else [] - scores.append(torch.tensor([output.score for output in output_objects])) - masks.append(torch.tensor(_masks)) - labels.append(torch.tensor([output.id - 1 for output in output_objects])) + scores.append(torch.tensor([output.score for output in output_objects], device=self.device)) + masks.append(torch.tensor(_masks, device=self.device)) + labels.append(torch.tensor([output.id - 1 for output in output_objects], device=self.device)) if outputs and outputs[0].saliency_map: predicted_s_maps = [] diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py index ce3b724c387..0e8cbf85e06 100644 --- a/src/otx/core/model/segmentation.py +++ b/src/otx/core/model/segmentation.py @@ -364,7 +364,7 @@ def _customize_outputs( images=inputs.images, imgs_info=inputs.imgs_info, scores=[], - masks=[tv_tensors.Mask(mask.resultImage) for mask in outputs], + masks=[tv_tensors.Mask(mask.resultImage, device=self.device) for mask in outputs], saliency_map=predicted_s_maps, feature_vector=predicted_f_vectors, ) @@ -374,7 +374,7 @@ def _customize_outputs( images=inputs.images, imgs_info=inputs.imgs_info, scores=[], - masks=[tv_tensors.Mask(mask.resultImage) for mask in outputs], + masks=[tv_tensors.Mask(mask.resultImage, device=self.device) for mask in outputs], ) def _convert_pred_entity_to_compute_metric( diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index 4d5f4614520..f53830c7b87 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -217,7 +217,7 @@ def test_otx_e2e( "--work_dir", str(tmp_path_test / "outputs"), "--engine.device", - "cpu", + fxt_accelerator, *overrides, "--checkpoint", exported_model_path, diff --git a/tests/unit/core/model/test_base.py b/tests/unit/core/model/test_base.py index 6a82da35505..8b33b22d7e9 100644 --- a/tests/unit/core/model/test_base.py +++ b/tests/unit/core/model/test_base.py @@ -109,6 +109,9 @@ def input_batch(self) -> OTXBatchDataEntity: def model(self) -> OVModel: return OVModel(model_name="efficientnet-b0-pytorch", model_type="Classification") + def test_create_model(self) -> None: + OVModel(model_name="efficientnet-b0-pytorch", model_type="Classification", force_cpu=False) + def test_customize_inputs(self, model, input_batch) -> None: inputs = model._customize_inputs(input_batch) assert isinstance(inputs, dict)