diff --git a/.github/ISSUE_TEMPLATE/compute_issue_template b/.github/ISSUE_TEMPLATE/compute_issue_template new file mode 100644 index 00000000..802d541d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/compute_issue_template @@ -0,0 +1,33 @@ +--- +name: Compute Bring Your Own Model - Bug report +about: Create a report to help us improve +title: "[BUG] Compute BYOM Issue: " +labels: 'compute' +assignees: '' + +--- + +**Describe the issue** +Please provide details relating to the issue you're hitting, if it is related to performance, accuracy or other model issues with bringing your own model to Qualcomm AI Hub, to deploy to compute devices. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Stack trace** +If applicable, add screenshots to help explain your problem. + +**Host configuration:** + - OS and version: [e.g. Linux, Windows, macOS] + - Browser [e.g. chrome, safari] + - QAI-Hub-Models version: + - QAI-Hub client version: + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/iot_issue_template.md b/.github/ISSUE_TEMPLATE/iot_issue_template.md new file mode 100644 index 00000000..52735148 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/iot_issue_template.md @@ -0,0 +1,33 @@ +--- +name: IOT Bring Your Own Model - Model Issue +about: Create a report to help us improve +title: "[BUG] IOT BYOM Issue: " +labels: 'iot' +assignees: '' + +--- + +**Describe the issue** +Please provide details relating to the issue you're hitting, if it is related to performance, accuracy or other model issues with bringing your own model to Qualcomm AI Hub, to deploy to iot devices. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Stack trace** +If applicable, add screenshots to help explain your problem. + +**Host configuration:** + - OS and version: [e.g. Linux, Windows, macOS] + - Browser [e.g. chrome, safari] + - QAI-Hub-Models version: + - QAI-Hub client version: + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/mobile_issue_template.md b/.github/ISSUE_TEMPLATE/mobile_issue_template.md new file mode 100644 index 00000000..5177cb85 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/mobile_issue_template.md @@ -0,0 +1,33 @@ +--- +name: Mobile Bring Your Own Model - Model Issue +about: Create a report to help us improve +title: "[BUG] Mobile BYOM Issue: " +labels: 'mobile' +assignees: '' + +--- + +**Describe the issue** +Please provide details relating to the issue you're hitting, if it is related to performance, accuracy or other model issues with bringing your own model to Qualcomm AI Hub, to deploy to mobile devices. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Stack trace** +If applicable, add screenshots to help explain your problem. + +**Host configuration:** + - OS and version: [e.g. Linux, Windows, macOS] + - Browser [e.g. chrome, safari] + - QAI-Hub-Models version: + - QAI-Hub client version: + +**Additional context** +Add any other context about the problem here. diff --git a/README.md b/README.md index ce140fb0..2cac033b 100644 --- a/README.md +++ b/README.md @@ -286,6 +286,7 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE | [MobileNet-v3-Large-Quantized](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized) | [qai_hub_models.models.mobilenet_v3_large_quantized](qai_hub_models/models/mobilenet_v3_large_quantized/README.md) | ✔️ | ✔️ | ✔️ | [MobileNet-v3-Small](https://aihub.qualcomm.com/models/mobilenet_v3_small) | [qai_hub_models.models.mobilenet_v3_small](qai_hub_models/models/mobilenet_v3_small/README.md) | ✔️ | ✔️ | ✔️ | [RegNet](https://aihub.qualcomm.com/models/regnet) | [qai_hub_models.models.regnet](qai_hub_models/models/regnet/README.md) | ✔️ | ✔️ | ✔️ +| [RegNetQuantized](https://aihub.qualcomm.com/models/regnet_quantized) | [qai_hub_models.models.regnet_quantized](qai_hub_models/models/regnet_quantized/README.md) | ✔️ | ✔️ | ✔️ | [ResNeXt101](https://aihub.qualcomm.com/models/resnext101) | [qai_hub_models.models.resnext101](qai_hub_models/models/resnext101/README.md) | ✔️ | ✔️ | ✔️ | [ResNeXt101Quantized](https://aihub.qualcomm.com/models/resnext101_quantized) | [qai_hub_models.models.resnext101_quantized](qai_hub_models/models/resnext101_quantized/README.md) | ✔️ | ✔️ | ✔️ | [ResNeXt50](https://aihub.qualcomm.com/models/resnext50) | [qai_hub_models.models.resnext50](qai_hub_models/models/resnext50/README.md) | ✔️ | ✔️ | ✔️ @@ -295,6 +296,7 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE | [ResNet18](https://aihub.qualcomm.com/models/resnet18) | [qai_hub_models.models.resnet18](qai_hub_models/models/resnet18/README.md) | ✔️ | ✔️ | ✔️ | [ResNet18Quantized](https://aihub.qualcomm.com/models/resnet18_quantized) | [qai_hub_models.models.resnet18_quantized](qai_hub_models/models/resnet18_quantized/README.md) | ✔️ | ✔️ | ✔️ | [ResNet50](https://aihub.qualcomm.com/models/resnet50) | [qai_hub_models.models.resnet50](qai_hub_models/models/resnet50/README.md) | ✔️ | ✔️ | ✔️ +| [ResNet50Quantized](https://aihub.qualcomm.com/models/resnet50_quantized) | [qai_hub_models.models.resnet50_quantized](qai_hub_models/models/resnet50_quantized/README.md) | ✔️ | ✔️ | ✔️ | [Shufflenet-v2](https://aihub.qualcomm.com/models/shufflenet_v2) | [qai_hub_models.models.shufflenet_v2](qai_hub_models/models/shufflenet_v2/README.md) | ✔️ | ✔️ | ✔️ | [Shufflenet-v2Quantized](https://aihub.qualcomm.com/models/shufflenet_v2_quantized) | [qai_hub_models.models.shufflenet_v2_quantized](qai_hub_models/models/shufflenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️ | [SqueezeNet-1_1](https://aihub.qualcomm.com/models/squeezenet1_1) | [qai_hub_models.models.squeezenet1_1](qai_hub_models/models/squeezenet1_1/README.md) | ✔️ | ✔️ | ✔️ @@ -368,13 +370,16 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE | | | | | | **Pose Estimation** | [HRNetPose](https://aihub.qualcomm.com/models/hrnet_pose) | [qai_hub_models.models.hrnet_pose](qai_hub_models/models/hrnet_pose/README.md) | ✔️ | ✔️ | ✔️ +| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | ✔️ | ✔️ | ✔️ | [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | ✔️ | ✔️ | ✔️ | [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | ✔️ | ✔️ | ✔️ | [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️ | [Posenet-Mobilenet](https://aihub.qualcomm.com/models/posenet_mobilenet) | [qai_hub_models.models.posenet_mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | ✔️ | ✔️ | ✔️ +| [Posenet-Mobilenet-Quantized](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized) | [qai_hub_models.models.posenet_mobilenet_quantized](qai_hub_models/models/posenet_mobilenet_quantized/README.md) | ✔️ | ✔️ | ✔️ | | | | | | **Depth Estimation** | [Midas-V2](https://aihub.qualcomm.com/models/midas) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | ✔️ | ✔️ | ✔️ +| [Midas-V2-Quantized](https://aihub.qualcomm.com/models/midas_quantized) | [qai_hub_models.models.midas_quantized](qai_hub_models/models/midas_quantized/README.md) | ✔️ | ✔️ | ✔️ ### Audio diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py index 49a2e97c..d5abcbec 100644 --- a/qai_hub_models/_version.py +++ b/qai_hub_models/_version.py @@ -2,4 +2,4 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -__version__ = "0.8.0" +__version__ = "0.9.0" diff --git a/qai_hub_models/evaluators/base_evaluators.py b/qai_hub_models/evaluators/base_evaluators.py index d933680d..51235aee 100644 --- a/qai_hub_models/evaluators/base_evaluators.py +++ b/qai_hub_models/evaluators/base_evaluators.py @@ -135,7 +135,6 @@ def _for_each_batch( The input, output, and (if provided) ground_truth will be passed to this function after each inference. """ torch_device = torch.device(device) - model.eval() model.to(torch_device) total_samples = 0 num_samples = num_samples or len(data) diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt index af284942..b3dc1e52 100644 --- a/qai_hub_models/global_requirements.txt +++ b/qai_hub_models/global_requirements.txt @@ -9,6 +9,7 @@ Deprecated==1.2.11 PySoundFile; sys_platform == 'win32' aimet-torch==1.31.2; sys_platform == "linux" albumentations==0.5.2 +audio2numpy==0.1.2 basicsr==1.4.2 boto3==1.34.119 botocore==1.34.119 @@ -41,6 +42,7 @@ pytorch-lightning==1.6.0 rapidfuzz==3.8.1 regex==2023.10.3 ruamel-yaml==0.18.6 +samplerate==0.2.1 schema==0.7.5 scikit-image==0.21.0 scikit-learn==1.1.3 diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py index 03325642..e9413727 100644 --- a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py +++ b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py @@ -119,8 +119,7 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: ) input_tensor = preprocess_cityscapes_image(resized_image) - with torch.no_grad(): - small_res_output = self.model(input_tensor) + small_res_output = self.model(input_tensor) output = F.interpolate( small_res_output, diff --git a/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py b/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py index c098f281..c6f8927f 100644 --- a/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py +++ b/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py @@ -122,5 +122,4 @@ def from_pretrained( aimet_encodings = cls._default_aimet_encodings() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/_shared/deeplab/app.py b/qai_hub_models/models/_shared/deeplab/app.py index 2a8b929a..85bcf519 100644 --- a/qai_hub_models/models/_shared/deeplab/app.py +++ b/qai_hub_models/models/_shared/deeplab/app.py @@ -64,9 +64,8 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: """ input_tensor = preprocess_image(image) - with torch.no_grad(): - output = self.model(input_tensor) - output = output[0] + output = self.model(input_tensor) + output = output[0] predictions = output.argmax(0).byte().cpu().numpy() if raw_output: diff --git a/qai_hub_models/models/_shared/detr/app.py b/qai_hub_models/models/_shared/detr/app.py index 3797f4aa..d14885b4 100644 --- a/qai_hub_models/models/_shared/detr/app.py +++ b/qai_hub_models/models/_shared/detr/app.py @@ -81,8 +81,7 @@ def predict( ) image_array = normalize_image_torchvision(preprocess_PIL_image(image)) - with torch.no_grad(): - outputs = self.model(image_array) + outputs = self.model(image_array) target_sizes = torch.tensor(image.size[::-1]).unsqueeze(0) out_logits, out_bbox = outputs[0], outputs[1] diff --git a/qai_hub_models/models/_shared/detr/model.py b/qai_hub_models/models/_shared/detr/model.py index a92a0a91..c5f812a5 100644 --- a/qai_hub_models/models/_shared/detr/model.py +++ b/qai_hub_models/models/_shared/detr/model.py @@ -27,7 +27,6 @@ def __init__(self, model: nn.Module) -> None: @classmethod def from_pretrained(cls, ckpt_name: str): model = DetrForObjectDetection.from_pretrained(ckpt_name) - model.eval() return cls(model) def forward(self, image: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: diff --git a/qai_hub_models/models/_shared/fastsam/model.py b/qai_hub_models/models/_shared/fastsam/model.py index 4342fb72..2ed257d7 100644 --- a/qai_hub_models/models/_shared/fastsam/model.py +++ b/qai_hub_models/models/_shared/fastsam/model.py @@ -22,7 +22,6 @@ def __init__(self, model: nn.Module) -> None: @classmethod def from_pretrained(cls, ckpt_name: str): model = FastSAM(ckpt_name).model - model.eval() return cls(model) def forward(self, image: torch.Tensor): diff --git a/qai_hub_models/models/_shared/ffnet/model.py b/qai_hub_models/models/_shared/ffnet/model.py index e1fb853a..ba54b038 100644 --- a/qai_hub_models/models/_shared/ffnet/model.py +++ b/qai_hub_models/models/_shared/ffnet/model.py @@ -69,7 +69,6 @@ class FFNet(CityscapesSegmentor): @classmethod def from_pretrained(cls: Type[FFNetType], variant_name: str) -> FFNetType: model = _load_ffnet_source_model(variant_name) - model.eval() return cls(model) @@ -121,7 +120,7 @@ def _load_ffnet_source_model(variant_name) -> torch.nn.Module: from models.model_registry import model_entrypoint - model = model_entrypoint(variant_name)().eval() + model = model_entrypoint(variant_name)() return model diff --git a/qai_hub_models/models/_shared/ffnet_quantized/model.py b/qai_hub_models/models/_shared/ffnet_quantized/model.py index 9a7e6522..d9a29dd5 100644 --- a/qai_hub_models/models/_shared/ffnet_quantized/model.py +++ b/qai_hub_models/models/_shared/ffnet_quantized/model.py @@ -78,5 +78,4 @@ def from_pretrained( aimet_encodings = cls.default_aimet_encodings() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/_shared/imagenet_classifier/app.py b/qai_hub_models/models/_shared/imagenet_classifier/app.py index bee0e780..2762b89f 100644 --- a/qai_hub_models/models/_shared/imagenet_classifier/app.py +++ b/qai_hub_models/models/_shared/imagenet_classifier/app.py @@ -75,6 +75,5 @@ def predict(self, image: Image) -> torch.Tensor: to a different Imagenet1K class. """ input_tensor = preprocess_image(image, not self.normalization_in_network) - with torch.no_grad(): - output = self.model(input_tensor) + output = self.model(input_tensor) return torch.softmax(output[0], dim=0) diff --git a/qai_hub_models/models/_shared/imagenet_classifier/model.py b/qai_hub_models/models/_shared/imagenet_classifier/model.py index ac4e1b4d..878a6673 100644 --- a/qai_hub_models/models/_shared/imagenet_classifier/model.py +++ b/qai_hub_models/models/_shared/imagenet_classifier/model.py @@ -55,7 +55,6 @@ def __init__( self.normalize_input = normalize_input self.transform_input = transform_input self.net = net - self.eval() # Type annotation on image_tensor causes aimet onnx export failure def forward(self, image_tensor): diff --git a/qai_hub_models/models/_shared/llama/__init__.py b/qai_hub_models/models/_shared/llama/__init__.py new file mode 100644 index 00000000..21a22b31 --- /dev/null +++ b/qai_hub_models/models/_shared/llama/__init__.py @@ -0,0 +1,4 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/app.py b/qai_hub_models/models/_shared/llama/app.py similarity index 59% rename from qai_hub_models/models/llama_v2_7b_chat_quantized/app.py rename to qai_hub_models/models/_shared/llama/app.py index f27b7d29..65a95e64 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/app.py +++ b/qai_hub_models/models/_shared/llama/app.py @@ -2,28 +2,18 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +from __future__ import annotations + import gc -from typing import Any, Callable, List, Tuple +from abc import abstractmethod +from typing import Any, Callable, Dict, List, Set, Tuple import qai_hub as hub import torch -from qai_hub_models.models.llama_v2_7b_chat_quantized.model import ( - NUM_SPLITS, - Llama2_PromptProcessor_1_Quantized, - Llama2_PromptProcessor_2_Quantized, - Llama2_PromptProcessor_3_Quantized, - Llama2_PromptProcessor_4_Quantized, - Llama2_TokenGenerator_1_Quantized, - Llama2_TokenGenerator_2_Quantized, - Llama2_TokenGenerator_3_Quantized, - Llama2_TokenGenerator_4_Quantized, - get_input_prompt_with_tags, - get_past_keyval_with_shift, - prepare_combined_attention_mask, -) -from qai_hub_models.models.llama_v2_7b_chat_quantized.modeling_llama import ( +from qai_hub_models.models._shared.llama.model import ( RopeEmbedding, + get_past_keyval_with_shift, ) from qai_hub_models.utils.base_model import CollectionModel from qai_hub_models.utils.inference import ExecutableModelProtocol, HubModel @@ -35,36 +25,22 @@ def _get_tokens_from_logits(output: torch.Tensor): return torch.multinomial(probs, num_samples=1).squeeze(1) -def _get_model_class(split_part: int, is_token_generator: bool = False): - if split_part < 1 or split_part > 4: - raise RuntimeError( - "Incorrect index provided to request Model split class." - f" Must be within (1-4), provided ({split_part})." - ) - - if is_token_generator: - return [ - Llama2_TokenGenerator_1_Quantized, - Llama2_TokenGenerator_2_Quantized, - Llama2_TokenGenerator_3_Quantized, - Llama2_TokenGenerator_4_Quantized, - ][split_part - 1] - return [ - Llama2_PromptProcessor_1_Quantized, - Llama2_PromptProcessor_2_Quantized, - Llama2_PromptProcessor_3_Quantized, - Llama2_PromptProcessor_4_Quantized, - ][split_part - 1] - - -class Llama2ModelPipelineBase(ExecutableModelProtocol): +class LlamaModelPipelineBase(ExecutableModelProtocol): """ Llama Pipeline to execute model splits one after another """ - def __init__(self, num_splits: int, is_token_generator: bool = False): + def __init__( + self, + num_splits: int, + num_past_key_val_heads: int, + model_split_map: Dict[int, Tuple[int, int]], + is_token_generator: bool = False, + ): self.num_splits = num_splits self.is_token_generator = is_token_generator + self.num_past_key_val_heads = num_past_key_val_heads + self.model_split_map = model_split_map self.model_type = "TokenGenerator" if is_token_generator else "PromptProcessor" def __call__( @@ -94,10 +70,11 @@ def forward( del model gc.collect() input_ids = out[0] - past_key_values.extend(list(out[1:])) + for each in out[1:]: + past_key_values.extend(list(torch.split(each, 1, dim=1))) # Return logits + past_key_values - return (out[0],) + tuple(past_key_values) + return tuple((out[0], *past_key_values)) def forward_tg( self, @@ -108,13 +85,18 @@ def forward_tg( *past_key_values, ): past_key_values_new = [] - n = 512 + start_past_key_offset = 0 for i in range(1, self.num_splits + 1): with suppress_warnings(): model = self.load_model_part(i) print(f"Running {self.model_type} {i}/{self.num_splits}") - split_offset = n * (i - 1) - past_values = past_key_values[split_offset : split_offset + n] + layer_start, layer_end = self.model_split_map[i] + num_of_key_vals = ( + self.num_past_key_val_heads * 2 * (layer_end - layer_start) + ) + + end_past_key_offset = start_past_key_offset + num_of_key_vals + past_values = past_key_values[start_past_key_offset:end_past_key_offset] out = model( input_ids, attention_mask, @@ -131,7 +113,7 @@ def forward_tg( for j, new_cache_j in enumerate(out[1:]): # Construct new past entries by concatenating old and new - past_j = past_key_values[split_offset + j] + past_j = past_key_values[start_past_key_offset + j] # Concatenation is not always along the same dimension if new_cache_j.shape[3] == 1: @@ -151,12 +133,17 @@ def forward_tg( dim=dim, ) ) + start_past_key_offset = end_past_key_offset # Return logits + past_key_values - return (out[0],) + tuple(past_key_values_new) + return tuple((out[0], *past_key_values_new)) + @abstractmethod + def load_model_part(self, model_part: int): + pass -class HubLlama2ModelPipeline(Llama2ModelPipelineBase): + +class HubLlamaModelPipeline(LlamaModelPipelineBase): """ Pipeline wrapper for HubModels """ @@ -166,21 +153,29 @@ def __init__( hub_model_ids: List[str], hub_device: hub.Device, inference_options: str, + get_model_class: Callable, + num_past_key_val_heads: int, + model_split_map: Dict[int, Tuple[int, int]], is_token_generator: bool = False, ): - super().__init__(len(hub_model_ids), is_token_generator=is_token_generator) + super().__init__( + len(hub_model_ids), + num_past_key_val_heads, + model_split_map, + is_token_generator=is_token_generator, + ) self.models = [] for i, model_id in enumerate(hub_model_ids): hub_model = HubModel( hub.get_model(model_id), - input_names=_get_model_class( + input_names=get_model_class( i + 1, is_token_generator=is_token_generator ) .get_input_spec() .keys(), device=hub_device, inference_options=inference_options, - output_names=_get_model_class( + output_names=get_model_class( i + 1, is_token_generator=is_token_generator ).get_output_names(), ) @@ -190,95 +185,122 @@ def load_model_part(self, model_part: int): model_index = model_part - 1 if model_index < 0 or model_index > len(self.models): raise RuntimeError( - f"HubLlama2ModelPipeline does not have requested model_part {model_part}." + f"HubLlamaModelPipeline does not have requested model_part {model_part}." ) - return self.models[model_index] -class Llama2ModelPipeline(Llama2ModelPipelineBase): +class LlamaModelPipeline(LlamaModelPipelineBase): """ Pipeline wrapper for PyTorch base model """ def __init__( - self, prompt_processor: CollectionModel, is_token_generator: bool = False + self, + models: CollectionModel, + num_splits: int, + num_past_key_val_heads: int, + model_split_map: Dict[int, Tuple[int, int]], + is_token_generator: bool = False, ): - self.prompt_processor = prompt_processor + self.models = models + self.num_splits = num_splits self.model_type = "TokenGenerator" if is_token_generator else "PromptProcessor" - super().__init__(NUM_SPLITS, is_token_generator=is_token_generator) + super().__init__( + num_splits, + num_past_key_val_heads=num_past_key_val_heads, + model_split_map=model_split_map, + is_token_generator=is_token_generator, + ) def load_model_part(self, model_part: int): - if model_part < 1 or model_part > NUM_SPLITS: + if model_part < 1 or model_part > self.num_splits: raise RuntimeError( - f"ModelLlama2ModelPipeline does not have requested model_part {model_part}." + f"ModelLlamaModelPipeline does not have requested model_part {model_part}." ) - return self.prompt_processor.load_model_part( - f"Llama2_{self.model_type}_{model_part}_Quantized" - ) + return self.models.load_model_part(f"{self.model_type}_{model_part}_Quantized") class ChatApp: + """ + This class is demonstration of how once can use Llama model to build a basic ChatApp. + This App use two models + * Prompt Processor + - Processes user input prompt to generate first token and KV-cache + * Token Generator + - Generators output token one at a time + - Uses KV-cache to speed up token generation + """ + def __init__( - self, prompt_processor: Callable, token_generator: Callable, tokenizer: Any + self, + prompt_processor: Callable, + token_generator: Callable, + get_input_prompt_with_tags: Callable, + prepare_combined_attention_mask: Callable, + tokenizer: Any, + end_tokens: Set[str], + num_past_key_val_heads: int, ): + """ + Base ChatApp that generates one response for given input token. + + prompt_processor: Prompt Processor collection model + token_generator: Token Generator collection model + get_input_prompt_with_tags: Function to wrap input prompt with appropriate tags + prepare_combined_attention_mask: Function to combine and build attention mask, + tokenizer: Tokenizer to use, + end_tokens: Set of end tokens to convey end of token generation, + num_past_key_val_heads: Number of heads in past-key value, + """ self.prompt_processor = prompt_processor self.token_generator = token_generator + self.get_input_prompt_with_tags = get_input_prompt_with_tags + self.prepare_combined_attention_mask = prepare_combined_attention_mask self.tokenizer = tokenizer + self.end_tokens = end_tokens + self.num_past_key_val_heads = num_past_key_val_heads def generate_output_prompt( self, input_prompt: str, max_seq_len: int, max_output_tokens: int ): - input_prompt_processed = get_input_prompt_with_tags( + input_prompt_processed = self.get_input_prompt_with_tags( user_input_prompt=input_prompt ) - input_tokens = self.tokenizer(input_prompt_processed, return_tensors="pt") - token_size = input_tokens["input_ids"].shape[-1] - padding_size = max_seq_len - token_size - - input_ids = torch.cat( - ( - torch.Tensor([self.tokenizer.unk_token_id] * padding_size).reshape( - 1, padding_size - ), - input_tokens["input_ids"], - ), - dim=-1, - ).type(torch.int32) - attention_mask = torch.cat( - ( - torch.Tensor([0] * padding_size).reshape(1, padding_size), - input_tokens["attention_mask"], - ), - dim=-1, - ).type(torch.int32) - cm_attention_masks = prepare_combined_attention_mask( - attention_mask=attention_mask + + input_tokens = self.tokenizer( + input_prompt_processed, + return_tensors="pt", + padding="max_length", + max_length=max_seq_len, ) + input_ids = input_tokens["input_ids"].type(torch.long) + num_tokens = torch.sum(input_tokens["attention_mask"]).item() + padding_size = max_seq_len - num_tokens + position_ids = [0] * (padding_size) + list(range(0, num_tokens)) position_ids = ( - torch.cat( - ( - torch.zeros( - padding_size, - ), - torch.arange(token_size), - ) - ) - .reshape(1, max_seq_len) - .type(torch.int32) + torch.Tensor(position_ids).type(torch.long).reshape(1, max_seq_len) + ) + attention_mask = input_tokens["attention_mask"].type(torch.float32) + cm_attention_masks = self.prepare_combined_attention_mask( + attention_mask=attention_mask, + input_shape=input_tokens["attention_mask"].shape, ) - position_ids = ( torch.Tensor(position_ids).type(torch.long).reshape(1, max_seq_len) ) position_ids_cos, position_ids_sin = RopeEmbedding( max_length=max_seq_len ).get_embedding(position_ids) + + # Process input prompt output = self.prompt_processor( input_ids, cm_attention_masks, position_ids_cos, position_ids_sin ) output_token = _get_tokens_from_logits(output) - past_key_values = get_past_keyval_with_shift(output[1:]).values() + past_key_values = get_past_keyval_with_shift( + output[1:], num_of_past_key_heads=self.num_past_key_val_heads + ).values() output_prompt = self.tokenizer.decode(output_token) print() print(f"Text generated by Prompt Processor: {output_prompt}") @@ -286,10 +308,8 @@ def generate_output_prompt( # Collect output prompt to summarize later hub_tokens = output_token - num_of_tokens_processed = token_size + 1 + num_of_tokens_processed = num_tokens + 1 - # TODO: Revisiting demo and app to refactor like a chat-bot - # This is just a place-holder to show how both models work together for _ in range(max_output_tokens - 1): # TODO: check if previous generated token is EOS if num_of_tokens_processed >= max_seq_len: @@ -300,7 +320,7 @@ def generate_output_prompt( attention_mask = torch.cat( (attention_mask[:, 1:], torch.Tensor([[1]])), dim=-1 ) - cm_attention_masks = prepare_combined_attention_mask( + cm_attention_masks = self.prepare_combined_attention_mask( attention_mask=attention_mask, input_shape=(1, 1), past_key_values_length=max_seq_len - 1, @@ -311,6 +331,8 @@ def generate_output_prompt( position_ids_cos, position_ids_sin = RopeEmbedding( max_length=max_seq_len ).get_embedding(position_ids) + + # Generate output token output = self.token_generator( input_ids, cm_attention_masks, @@ -322,6 +344,11 @@ def generate_output_prompt( del input_ids output_token = _get_tokens_from_logits(output) output_prompt = self.tokenizer.decode(output_token) + + # Assistant generating end of token + if output_prompt in self.end_tokens: + break + past_key_values = output[1:] hub_tokens = torch.cat((hub_tokens, output_token), dim=-1) print() @@ -333,4 +360,3 @@ def generate_output_prompt( print("-------- Response Summary --------") print(f"Prompt: {input_prompt}") print(f"Response: {self.tokenizer.decode(hub_tokens)}") - return output_prompt diff --git a/qai_hub_models/models/_shared/llama/demo.py b/qai_hub_models/models/_shared/llama/demo.py new file mode 100644 index 00000000..d9c4fb09 --- /dev/null +++ b/qai_hub_models/models/_shared/llama/demo.py @@ -0,0 +1,185 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +from typing import Any, Callable, Dict, List, Set, Tuple, Type + +import qai_hub as hub + +from qai_hub_models.models._shared.llama.app import ChatApp as App +from qai_hub_models.models._shared.llama.app import ( + HubLlamaModelPipeline, + LlamaModelPipeline, +) +from qai_hub_models.models._shared.llama.model import DEFAULT_INPUT_SEQ_LEN +from qai_hub_models.utils.args import ( + get_model_cli_parser, + get_on_device_demo_parser, + validate_on_device_demo_args, +) +from qai_hub_models.utils.base_model import BaseModel, TargetRuntime +from qai_hub_models.utils.huggingface import has_model_access + +# Max output tokens to generate +# You can override this with cli argument. +# Keeping this short as on-device demo takes time to converge. +MAX_OUTPUT_TOKENS = 10 +DEFAULT_DEVICE = "Samsung Galaxy S24 (Family)" +DEFAULT_USER_PROMPT = "Hi! What is 2+3?" + + +def llama_chat_demo( + model_cls: Type[BaseModel], + model_id: str, + get_model_class: Callable, + get_input_prompt_with_tags: Callable, + prepare_combined_attention_mask: Callable, + tokenizer: Any, + num_splits: int, + num_key_val_heads: int, + model_split_map: Dict[int, Tuple[int, int]], + end_tokens: Set[str], + hf_repo_name: str, + hf_repo_url: str, + default_prompt: str = DEFAULT_USER_PROMPT, + is_test: bool = False, + available_target_runtimes: List[TargetRuntime] = [TargetRuntime.QNN], +): + """ + Shared Chat Demo App to generate output for provided input prompt + model_cls: Model base class (either Prompt Processor or Token Generator) + model_id: Model ID from hub, + get_model_class: Function to get initialize model class, + get_input_prompt_with_tags: Function to wrap input prompt with appropriate tags, + prepare_combined_attention_mask: Function to combine attention mask, + tokenizer: Tokenizer to encode-decode prompt, + num_splits: Number of model splits, + num_key_val_heads: Number of heads in past key-value cache, + model_split_map: Map for split number to decoder layer ranges, + end_tokens: Set of end tokens to use for end of output generation, + hf_repo_name: HF repo name, + hf_repo_url: HF repo url, + default_prompt: Default prompt to set, + is_test: If test, no options required, + available_target_runtimes: Default availble runtime in options, + """ + # Demo parameters + parser = get_model_cli_parser(model_cls) + parser = get_on_device_demo_parser( + parser, + add_output_dir=True, + available_target_runtimes=available_target_runtimes, + default_device=DEFAULT_DEVICE, + ) + parser.add_argument( + "--prompt", + type=str, + default=default_prompt, + help="input prompt.", + ) + parser.add_argument( + "--prompt-processor-input-seq-len", + type=int, + default=DEFAULT_INPUT_SEQ_LEN, + help="input sequence length for prompt-processor. This must be less than `max_position_embeddings` set for model.", + ) + parser.add_argument( + "--max-output-tokens", + type=int, + default=MAX_OUTPUT_TOKENS, + help="max output tokens to generate.", + ) + args = parser.parse_args([] if is_test else None) + validate_on_device_demo_args(args, model_id) + + if not is_test: + print(f"\n{'-' * 85}") + print(f"** Generating response via {model_id} **") + print() + print("Prompt:", args.prompt) + print("Max number of output tokens to generate:", args.max_output_tokens) + print("Please pass `--max-output-tokens ` to generate longer responses.") + print() + print( + """NOTE: Each token generation takes around 15 mins on-device: + 1. Model is divided into multiple parts to fit into device constraints + 2. Each model requires separate execution on-device via AI Hub + 3. Due to autoregressive nature, we cannot run step 2 in parallel + 4. Device procurement is subject to device availability and might take longer to run demo on-device + +Alternative: + 1. Run demo on host (with PyTorch) to verify e2e result for longer responses + 2. Run demo on-device for shorter responses (--max-output-tokens 10 or 20) + 3. [Optional] Can run demo on-device to generate long sentence (takes longer) + +We are actively working on to improve UX and reduce turn-around time for these models. +""" + ) + print(f"{'-' * 85}\n") + + if not args.on_device: + prompt_processor = LlamaModelPipeline( + model_cls.from_pretrained(), + num_splits=num_splits, + num_past_key_val_heads=num_key_val_heads, + model_split_map=model_split_map, + ) + token_generator = LlamaModelPipeline( + model_cls.from_pretrained(), + num_splits=num_splits, + num_past_key_val_heads=num_key_val_heads, + model_split_map=model_split_map, + is_token_generator=True, + ) + else: + hub_model_ids = args.hub_model_id.split(",") + # First four models are Prompt Processor + # Last four models are Token Generator + if len(hub_model_ids) != num_splits * 2: + model_id_lists = ",".join( + [f"" for i in range(1, num_splits * 2 + 1)] + ) + raise RuntimeError( + "Please provide comma separated hub-model-ids for Llama Prompt Processor and Token Generator," + f" e.g. --hub-model-id {model_id_lists}.\n" + "Specify model-ids for four Prompt Processor models first, then Token Generator models.\n" + "If you run export.py it will print out command to run on-device demo with ordered model-ids." + ) + + hub_device = hub.Device(args.device) + prompt_processor = HubLlamaModelPipeline( + hub_model_ids[:num_splits], + hub_device=hub_device, + inference_options=args.inference_options, + get_model_class=get_model_class, + num_past_key_val_heads=num_key_val_heads, + model_split_map=model_split_map, + ) + token_generator = HubLlamaModelPipeline( + hub_model_ids[num_splits:], + hub_device=hub_device, + inference_options=args.inference_options, + get_model_class=get_model_class, + num_past_key_val_heads=num_key_val_heads, + model_split_map=model_split_map, + is_token_generator=True, + ) + + has_model_access(hf_repo_name, hf_repo_url) + + app = App( + prompt_processor, + token_generator, + get_input_prompt_with_tags=get_input_prompt_with_tags, + prepare_combined_attention_mask=prepare_combined_attention_mask, + tokenizer=tokenizer, + end_tokens=end_tokens, + num_past_key_val_heads=num_key_val_heads, + ) + app.generate_output_prompt( + args.prompt, + max_seq_len=args.prompt_processor_input_seq_len, + max_output_tokens=args.max_output_tokens, + ) diff --git a/qai_hub_models/models/_shared/llama/model.py b/qai_hub_models/models/_shared/llama/model.py new file mode 100644 index 00000000..d7e70ed1 --- /dev/null +++ b/qai_hub_models/models/_shared/llama/model.py @@ -0,0 +1,261 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import os +import pickle +from typing import List, Optional + +import torch +from qai_hub.client import Device + +from qai_hub_models.models.common import ( + SampleInputsType, + SourceModelFormat, + TargetRuntime, +) +from qai_hub_models.utils.aimet.aimet_dummy_model import AimetEncodingLoaderMixin +from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, CachedWebModelAsset +from qai_hub_models.utils.base_model import BaseModel, TargetRuntime +from qai_hub_models.utils.input_spec import InputSpec + +DEFAULT_INPUT_SEQ_LEN = 1024 + + +def get_hidden_layer_range_from_split(split_part: int, model_split_map: dict): + hidden_layers_start, hidden_layers_end = model_split_map[split_part] + return hidden_layers_start, hidden_layers_end + + +def get_past_key_names( + start: int = 0, end: int = 8, num_of_past_key_heads=32, suffix="" +): + past_key_val_name = [] + for i in range(start, end): + cache_names = [ + f"past_key_{i}_h{j}{suffix}" for j in range(num_of_past_key_heads) + ] + [f"past_value_{i}_h{j}{suffix}" for j in range(num_of_past_key_heads)] + past_key_val_name.extend(cache_names) + return past_key_val_name + + +def save_input_cached_data( + data: dict, + split_part: int, + data_dir: str, + model_name: str, + model_id: str, + model_asset_version: str, + model_type: str = "pp", + input_seq_len: int = DEFAULT_INPUT_SEQ_LEN, +): + data_path = ( + f"{data_dir}/{input_seq_len}/{model_name}_{split_part}_{model_type}_inputs.pkl" + ) + + inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path( + model_id, + model_asset_version, + f"{data_path}", + ) + + # if already exists, no need to re-serialize. + if os.path.exists(inputs_pkl_path): + return + + os.makedirs(os.path.dirname(inputs_pkl_path), exist_ok=True) + with open(f"{inputs_pkl_path}", "wb") as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) + + +def load_input_cached_data( + split_part: int, + data_dir: str, + model_name: str, + model_id: str, + model_asset_version: str, + model_type: str = "pp", + input_seq_len: int = DEFAULT_INPUT_SEQ_LEN, +): + data_path = ( + f"{data_dir}/{input_seq_len}/{model_name}_{split_part}_{model_type}_inputs.pkl" + ) + try: + + # Load local data path if already generated + inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path( + model_id, + model_asset_version, + f"{data_path}", + ) + + # If local data path not found, fetch from server if available + if not os.path.exists(inputs_pkl_path): + inputs_pkl_path = CachedWebModelAsset.from_asset_store( + model_id, + model_asset_version, + data_path, + ).fetch() + + with open(f"{inputs_pkl_path}", "rb") as f: + return pickle.load(f) + except Exception: + # Delete intermediate data file if error occurs + if os.path.exists(inputs_pkl_path): + os.remove(inputs_pkl_path) + print( + f"Unable to load cached data for {data_path}, creating data using PyTorch models." + ) + # Unable to load cached data, return None + return None + + +def get_past_keyval_with_shift( + past_key_vals: List[torch.Tensor], num_of_past_key_heads: int = 32 +): + """ + Clip past key value to feed next iteration + """ + tg_inputs = {} + total_key_val = num_of_past_key_heads * 2 + for i in range(0, len(past_key_vals), total_key_val): + l_num = i // total_key_val + for j, key in enumerate(past_key_vals[i : i + num_of_past_key_heads]): + tg_inputs[f"past_key_{l_num}_h{j}"] = key[:, :, :, 1:].detach() + + for j, val in enumerate( + past_key_vals[i + num_of_past_key_heads : i + total_key_val] + ): + tg_inputs[f"past_value_{l_num}_h{j}"] = val[:, :, 1:, :].detach() + + return tg_inputs + + +def make_torch_compatible_past_key_values( + decode_layers, past_key_val_per_layer, *past_values_flattened +): + past_key_values = [] + total_past_entries = len(past_values_flattened) + + # past values consists of + # 1. k decode/hidden layers + # 2. each decode layer has 2 entries: key and value + # 3. each key-value entry is has layer + if total_past_entries != decode_layers * past_key_val_per_layer * 2: + raise RuntimeError( + "Incorrect number of past key-values provided for model." + f"Expecting {decode_layers * past_key_val_per_layer * 2}, got {total_past_entries}." + ) + + for i in range(0, decode_layers * 2, 2): + keys = past_values_flattened[ + i * past_key_val_per_layer : (i + 1) * past_key_val_per_layer + ] + values = past_values_flattened[ + (i + 1) * past_key_val_per_layer : (i + 2) * past_key_val_per_layer + ] + + past_key_values.append((keys, values)) + return tuple(past_key_values) + + +class RopeEmbedding: + """ + Compute Rotary Position Embedding + Ref: https://arxiv.org/pdf/2104.09864 + + Compute RopeEmbedding outside model to simplify model quantization + """ + + def __init__(self, head_dim: int = 128, max_length: int = 1024): + """ + head_dim: dimension size of head + max_length: max sequence length to expect + """ + self.max_length = max_length + self.cos, self.sin = self.precompute_freqs_cis(head_dim, max_length * 2) + + def precompute_freqs_cis(self, dim: int, end: int, theta: float = 10000.0): + """ + Precompute embeeding matrix + """ + freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) + t = torch.arange(end) + freqs = torch.outer(t, freqs).float() + freqs_cis = torch.polar(torch.ones_like(freqs), freqs) + freqs_cis = freqs_cis[0 : self.max_length] + freqs_real = torch.view_as_real(freqs_cis) + freqs_real = freqs_real.unsqueeze(0).unsqueeze(0) + + freqs_cos = freqs_real[:, :, :, :, 0] # extract even elements + freqs_sin = freqs_real[:, :, :, :, 1] # extract odd elements + return freqs_cos, freqs_sin + + def get_embedding(self, position_ids: torch.Tensor): + """ + position_ids: [batch_size, sequence_length] + return [batch_size, 1, sequence_length, head_sim//2][2] + """ + cos = self.cos[0, 0, :, :] # [seq_len, dim] + sin = self.sin[0, 0, :, :] # [seq_len, dim] + cos = cos[position_ids].unsqueeze(1) + sin = sin[position_ids].unsqueeze(1) + return cos, sin + + +class Llama_QuantizedMixin(AimetEncodingLoaderMixin, BaseModel): + def __init__(self, model, encoding_path, is_token_generator=False): + AimetEncodingLoaderMixin.__init__(self, model, encoding_path) + BaseModel.__init__(self) + self.model = model + self.split_part = 1 + self.is_token_generator = is_token_generator + + def get_hub_compile_options( + self, + target_runtime: TargetRuntime, + other_compile_options: str = "", + device: Optional[Device] = None, + ) -> str: + if target_runtime != TargetRuntime.QNN: + raise RuntimeError( + f"Unsupported target_runtime provided: {target_runtime}." + " Only QNN runtime is supported for Llama for now." + ) + + return " --target_runtime qnn_context_binary --quantize_full_type w8a16 --quantize_io" + + @staticmethod + def get_output_names( + start: int = 0, + end: int = 8, + past_key_val_heads: int = 32, + output_name: str = "", + ): + # Clipped hidden layers are named same as first part for all parts + # Eventually, each split should have respective names. + # layer_start, layer_end = get_hidden_layer_range_from_split(split_part=split_part, model_split_map=model_split_map) + layer_range = end - start + output_list = [ + output_name if output_name else f"layers_{layer_range - 1}_add_out_0" + ] + output_list += get_past_key_names( + 0, layer_range, num_of_past_key_heads=past_key_val_heads, suffix="_out" + ) + return output_list + + def sample_inputs(self, input_spec: Optional[InputSpec] = None) -> SampleInputsType: + data = self.get_calibration_data(input_spec=input_spec) + for key, val in data.items(): + data[key] = [val.detach().numpy()] + return data + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + """ + Source model format preferred for conversion on AI Hub. + """ + return SourceModelFormat.ONNX diff --git a/qai_hub_models/models/_shared/video_classifier/app.py b/qai_hub_models/models/_shared/video_classifier/app.py index 8c92fe2e..13ed6312 100644 --- a/qai_hub_models/models/_shared/video_classifier/app.py +++ b/qai_hub_models/models/_shared/video_classifier/app.py @@ -153,7 +153,7 @@ class KineticsClassifierApp: """ def __init__(self, model: KineticsClassifier): - self.model = model.eval() + self.model = model def predict(self, path: str | Path) -> List[str]: """ diff --git a/qai_hub_models/models/_shared/whisper/app.py b/qai_hub_models/models/_shared/whisper/app.py index 27deac3d..fb3f9978 100644 --- a/qai_hub_models/models/_shared/whisper/app.py +++ b/qai_hub_models/models/_shared/whisper/app.py @@ -7,21 +7,22 @@ from typing import List, Tuple import numpy as np +import samplerate import torch import whisper # type: ignore from scipy import special as scipy_special # type: ignore -from qai_hub_models.models._shared.whisper.model import Whisper +from qai_hub_models.models._shared.whisper.model import ( + CHUNK_LENGTH, + HOP_LENGTH, + MEL_FILTER_PATH, + N_FFT, + N_MELS, + SAMPLE_RATE, + Whisper, +) from qai_hub_models.utils.model_adapters import TorchNumpyAdapter -# hard-coded audio hyperparameters -SAMPLE_RATE = 16000 -N_FFT = 400 -N_MELS = 80 -HOP_LENGTH = 160 -CHUNK_LENGTH = 30 -N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000 samples in a 30-second chunk - class WhisperApp: """ @@ -30,7 +31,15 @@ class WhisperApp: OpenAI Whisper. """ - def __init__(self, whisper: Whisper): + def __init__( + self, + whisper: Whisper, + mel_filter: np.ndarray | None = None, + sample_rate: int = SAMPLE_RATE, + max_audio_seconds: int = CHUNK_LENGTH, + n_fft: int = N_FFT, + hop_length: int = HOP_LENGTH, + ): decoder = whisper.decoder.to("cpu") encoder = whisper.encoder.to("cpu") self.num_decoder_blocks = whisper.num_decoder_blocks @@ -38,13 +47,25 @@ def __init__(self, whisper: Whisper): self.attention_dim = whisper.attention_dim self.mean_decode_len = whisper.mean_decode_len + self.mel_filter = mel_filter + if not self.mel_filter: + MEL_FILTER_PATH.fetch() + with np.load(MEL_FILTER_PATH.path()) as f: + self.mel_filter = f[f"mel_{N_MELS}"] + + self.hop_length = hop_length + self.sample_rate = sample_rate + self.max_audio_seconds = max_audio_seconds + self.n_fft = n_fft + self.max_audio_samples = self.max_audio_seconds * self.sample_rate + # Wraps torch Module so it takes np ndarray as input and outputs if isinstance(encoder, torch.nn.Module): self.encoder = TorchNumpyAdapter(encoder) else: self.encoder = encoder if isinstance(decoder, torch.nn.Module): - self.decoder = TorchNumpyAdapter(decoder.eval()) + self.decoder = TorchNumpyAdapter(decoder) else: self.decoder = decoder @@ -52,18 +73,57 @@ def predict(self, *args, **kwargs): # See transcribe. return self.transcribe(*args, **kwargs) - def transcribe(self, mel_input: np.ndarray) -> str: + def transcribe( + self, audio: np.ndarray | str, audio_sample_rate: int | None = None + ) -> str: + """ + Transcribe the provided audio to text. + + Parameters + ---------- + audio: numpy array | str + Path to audio file if a string. + Raw audio array of shape (# of samples) if a numpy array. + + audio_sample_rate: int | None + The sample rate of the provided audio, in samples / second. + If audio is a numpy array, this must be provided. + If audio is a file and audio_sample_rate is None, this is ignored and the sample rate will be derived from the audio file. + + Returns + ------- + List of audio arrays, chunked into N arrays of model_chunk_seconds seconds. """ - Transcribe an audio to text. + if isinstance(audio, str): + import audio2numpy as a2n # import here, as this requires ffmpeg to be installed on host machine + + audio, audio_sample_rate = a2n.audio_from_file(audio) + else: + assert audio_sample_rate is not None + + return " ".join( + self._transcribe_single_chunk(x) + for x in chunk_and_resample_audio(audio, audio_sample_rate) + ) + + def _transcribe_single_chunk(self, audio: np.ndarray) -> str: + """ + Transcribe an audio chunk to text. Parameters: - - mel_input: of shape (1, 80, 3000). Mel spectrogram of 30s audio. + audio: numpy array + A numpy array of audio of shape (number of samples). + The sample rate of this audio must be self.sample_rate. + The maximum length of this audio must be self.max_audio_samples. Returns: - transcribed texts """ + mel_input = log_mel_spectrogram( + self.mel_filter, audio, self.max_audio_samples, self.n_fft, self.hop_length + ) k_cache_cross, v_cache_cross = self.encoder(mel_input) # Start decoding # coreml only takes float tensors @@ -307,31 +367,13 @@ def apply_timestamp_rules( return logits, logprobs -def load_audio(mel_filter: np.ndarray, audio_path: str) -> np.ndarray: - """ - Load audio to a mel spectrogram. - """ - with np.load(audio_path) as f: - audio_np = f["audio"] - # Pad 30-seconds of silence to the input audio, for slicing - input_feature = log_mel_spectrogram(mel_filter, audio_np, pad_to_length=N_SAMPLES) - # input_feature has fixed shape [1, 80, 3000]. 80 is - # spectrogram feature dim, 3000 is due to Whisper only takes - # 30 seconds input represented as 10ms spectrogram segments - assert input_feature.shape == (1, 80, 3000) - return input_feature - - -def load_mel_filter(mel_filter_path: str) -> np.ndarray: - with np.load(mel_filter_path) as f: - return f["mel_80"] - - # Adopted from https://github.com/openai/whisper/blob/main/whisper/audio.py def log_mel_spectrogram( mel_filter: np.ndarray, audio_np: np.ndarray, pad_to_length: int, + n_fft: int, + hop_length: int, ) -> np.ndarray: """ Compute the log-Mel spectrogram of @@ -356,8 +398,8 @@ def log_mel_spectrogram( padding = pad_to_length - len(audio) if padding > 0: audio = torch.nn.functional.pad(audio, (0, padding)) - window = torch.hann_window(N_FFT) - stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) + window = torch.hann_window(n_fft) + stft = torch.stft(audio, n_fft, hop_length, window=window, return_complex=True) magnitudes = stft[..., :-1].abs() ** 2 mel_spec = torch.from_numpy(mel_filter) @ magnitudes @@ -366,3 +408,53 @@ def log_mel_spectrogram( log_spec = torch.maximum(log_spec, log_spec.max() - 8.0) log_spec = (log_spec + 4.0) / 4.0 return log_spec.unsqueeze(0).detach().float().numpy() + + +def chunk_and_resample_audio( + audio: np.ndarray, + audio_sample_rate: int, + model_sample_rate=SAMPLE_RATE, + model_chunk_seconds=CHUNK_LENGTH, +) -> List[np.ndarray]: + """ + Parameters + ---------- + audio: str + Raw audio numpy array of shape [# of samples] + + audio_sample_rate: int + Sample rate of audio array, in samples / sec. + + model_sample_rate: int + Sample rate (samples / sec) required to run Whisper. The audio file + will be resampled to use this rate. + + model_chunk_seconds: int + Split the audio in to N sequences of this many seconds. + The final split may be shorter than this many seconds. + + Returns + ------- + List of audio arrays, chunked into N arrays of model_chunk_seconds seconds. + """ + if audio_sample_rate != model_sample_rate: + audio = samplerate.resample(audio, model_sample_rate / audio_sample_rate) + audio_sample_rate = model_sample_rate + + number_of_full_length_audio_chunks = ( + audio.shape[0] // audio_sample_rate // model_chunk_seconds + ) + last_sample_in_full_length_audio_chunks = ( + audio_sample_rate * number_of_full_length_audio_chunks * model_chunk_seconds + ) + + if number_of_full_length_audio_chunks == 0: + return [audio] + + return [ + *np.array_split( + audio[:last_sample_in_full_length_audio_chunks], + number_of_full_length_audio_chunks, + ), + audio[last_sample_in_full_length_audio_chunks:], + ] diff --git a/qai_hub_models/models/_shared/whisper/demo.py b/qai_hub_models/models/_shared/whisper/demo.py index bd9a4fa8..232f3750 100644 --- a/qai_hub_models/models/_shared/whisper/demo.py +++ b/qai_hub_models/models/_shared/whisper/demo.py @@ -2,19 +2,18 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from typing import Type +from typing import Tuple, Type -from qai_hub_models.models._shared.whisper.app import ( - WhisperApp, - load_audio, - load_mel_filter, -) +import numpy as np + +from qai_hub_models.models._shared.whisper.app import WhisperApp from qai_hub_models.models._shared.whisper.model import ( - MEL_FILTER_PATH, MODEL_ASSET_VERSION, MODEL_ID, + SAMPLE_RATE, Whisper, ) +from qai_hub_models.utils.args import get_model_cli_parser from qai_hub_models.utils.asset_loaders import CachedWebModelAsset TEST_AUDIO_PATH = CachedWebModelAsset.from_asset_store( @@ -22,19 +21,31 @@ ) -def whisper_demo(model_cls: Type[Whisper]): - # For other model sizes, see https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17 - app = WhisperApp(model_cls.from_pretrained()) +def load_demo_audio() -> Tuple[np.ndarray, int]: TEST_AUDIO_PATH.fetch() - MEL_FILTER_PATH.fetch() + with np.load(TEST_AUDIO_PATH.path()) as f: + return f["audio"], SAMPLE_RATE - # Load audio into mel spectrogram - mel_filter_path = MEL_FILTER_PATH.path() - mel_filter = load_mel_filter(mel_filter_path) - audio_path = TEST_AUDIO_PATH.path() - mel_input = load_audio(mel_filter, audio_path) +def whisper_demo(model_cls: Type[Whisper], is_test: bool = False): + parser = get_model_cli_parser(model_cls) + parser.add_argument( + "--audio_file", + type=str, + default=None, + help="Audio file path or URL", + ) + args = parser.parse_args([] if is_test else None) + + # For other model sizes, see https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17 + app = WhisperApp(model_cls.from_pretrained()) + + # Load default audio if file not provided + audio = args.audio_file + audio_sample_rate = None + if not audio: + audio, audio_sample_rate = load_demo_audio() # Perform transcription - transcription = app.transcribe(mel_input) + transcription = app.transcribe(audio, audio_sample_rate) print("Transcription:", transcription) diff --git a/qai_hub_models/models/_shared/whisper/model.py b/qai_hub_models/models/_shared/whisper/model.py index 1ea0acb4..cdc94c98 100644 --- a/qai_hub_models/models/_shared/whisper/model.py +++ b/qai_hub_models/models/_shared/whisper/model.py @@ -13,20 +13,40 @@ from qai_hub_models.utils.base_model import BaseModel, CollectionModel, TargetRuntime from qai_hub_models.utils.input_spec import InputSpec +MODEL_ID = "whisper_asr_shared" +MODEL_ASSET_VERSION = 1 + +# 20ms sample rate +SAMPLE_RATE = 16000 + +# Length of the Hann window signal used when applying a FFT to the audio. +N_FFT = 400 + +# Number of audio samples between adjacent STFT columns when applying FFT to the audio. +HOP_LENGTH = 160 + +# Audio chunk length in seconds +CHUNK_LENGTH = 30 + +# Samples per chunk +N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000 20ms samples in a 30-second chunk + # The official default max decoded length is 448. We use mean decoded length 224 for benchmarking purpose MEAN_DECODE_LEN = 224 -# The number of 20ms audio contexts in 30 seconds of audio -AUDIO_EMB_LEN = 1500 +# MEL filter to be applied to audio after applying FFT +MEL_FILTER_PATH = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "openai_assets/mel_filters.npz" +) # The number of Mel features per audio context N_MELS = 80 -MODEL_ID = "whisper_asr_shared" -MODEL_ASSET_VERSION = 1 -MEL_FILTER_PATH = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "openai_assets/mel_filters.npz" -) +# Audio embedding length +AUDIO_EMB_LEN = int(N_SAMPLES / N_MELS / 4) + +# Audio length per MEL feature +MELS_AUDIO_LEN = AUDIO_EMB_LEN * 2 class Whisper(CollectionModel): @@ -111,7 +131,7 @@ def get_input_spec() -> InputSpec: Returns the input specification (name -> (shape, type). This can be used to submit profiling job on Qualcomm AI Hub. """ - return dict(audio=((1, N_MELS, AUDIO_EMB_LEN * 2), "float32")) + return dict(audio=((1, N_MELS, MELS_AUDIO_LEN), "float32")) @classmethod def from_pretrained(cls): diff --git a/qai_hub_models/models/_shared/whisper/test_utils.py b/qai_hub_models/models/_shared/whisper/test_utils.py index 9b4fb089..f3bef8b1 100644 --- a/qai_hub_models/models/_shared/whisper/test_utils.py +++ b/qai_hub_models/models/_shared/whisper/test_utils.py @@ -2,30 +2,26 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +from typing import Tuple + import numpy as np import torch import whisper -from qai_hub_models.models._shared.whisper.app import ( - WhisperApp, - load_audio, - load_mel_filter, -) -from qai_hub_models.models._shared.whisper.demo import TEST_AUDIO_PATH -from qai_hub_models.models._shared.whisper.model import ( - MEAN_DECODE_LEN, - MEL_FILTER_PATH, - Whisper, - WhisperDecoderInf, - WhisperEncoderInf, -) +from qai_hub_models.models._shared.whisper.app import WhisperApp, log_mel_spectrogram +from qai_hub_models.models._shared.whisper.demo import load_demo_audio +from qai_hub_models.models._shared.whisper.model import MEAN_DECODE_LEN, Whisper -def load_mel_input() -> np.ndarray: - mel_filter_path = MEL_FILTER_PATH.fetch() - mel_filter = load_mel_filter(mel_filter_path) - audio_path = TEST_AUDIO_PATH.fetch() - return load_audio(mel_filter, audio_path) +def load_sample_audio_input(app: WhisperApp) -> Tuple[np.ndarray, np.ndarray, int]: + audio, sample_rate = load_demo_audio() + return ( + audio, + log_mel_spectrogram( + app.mel_filter, audio, app.max_audio_samples, app.n_fft, app.hop_length + ), + sample_rate, + ) def run_test_wrapper_numerics(whisper_version): @@ -35,20 +31,23 @@ def run_test_wrapper_numerics(whisper_version): processing) that matches with the original model's. """ + app = WhisperApp(Whisper.from_source_model(whisper.load_model(whisper_version))) + + # Load inputs + _, mel_input, _ = load_sample_audio_input(app) + # OpenAI - mel_input = load_mel_input() with torch.no_grad(): + model = whisper.load_model(whisper_version) mel_input = torch.from_numpy(mel_input) - model = whisper.load_model("tiny.en") audio_features = model.encoder(mel_input) tokens = torch.LongTensor([[50257]]) logits_orig = model.decoder(tokens, audio_features).detach().numpy() # QAIHM - encoder = WhisperEncoderInf(model) - decoder = WhisperDecoderInf(model.decoder) - + encoder = app.encoder.base_model + decoder = app.decoder.base_model k_cache_cross, v_cache_cross = encoder(mel_input) sample_len = MEAN_DECODE_LEN @@ -86,7 +85,8 @@ def run_test_transcribe(whisper_version): Test that WhisperApp produces end to end transcription results that matches with the original model """ - mel_input = load_mel_input() + app = WhisperApp(Whisper.from_source_model(whisper.load_model(whisper_version))) + audio, mel_input, sample_rate = load_sample_audio_input(app) # Run inference with OpenAI whisper with torch.no_grad(): @@ -97,8 +97,6 @@ def run_test_transcribe(whisper_version): results = model.decode(torch.from_numpy(mel_input).float(), options) text_orig = results[0].text - app = WhisperApp(Whisper.from_source_model(model)) - # Perform transcription - transcription = app.transcribe(mel_input) + transcription = app.transcribe(audio, sample_rate) assert transcription == text_orig diff --git a/qai_hub_models/models/_shared/yolo/app.py b/qai_hub_models/models/_shared/yolo/app.py index aea01975..12e28009 100644 --- a/qai_hub_models/models/_shared/yolo/app.py +++ b/qai_hub_models/models/_shared/yolo/app.py @@ -126,18 +126,15 @@ class scores per batch multiplied by confidence: List element shape is [num_pred self.check_image_size(NCHW_fp32_torch_frames) # Run prediction - with torch.no_grad(): - if self.model_includes_postprocessing: - pred_boxes, pred_scores, pred_class_idx = self.model( - NCHW_fp32_torch_frames - ) - else: - model_output = self.model(NCHW_fp32_torch_frames) - if isinstance(model_output, torch.Tensor): - model_output = (model_output,) - pred_boxes, pred_scores, pred_class_idx = self.pre_nms_postprocess( - *model_output - ) + if self.model_includes_postprocessing: + pred_boxes, pred_scores, pred_class_idx = self.model(NCHW_fp32_torch_frames) + else: + model_output = self.model(NCHW_fp32_torch_frames) + if isinstance(model_output, torch.Tensor): + model_output = (model_output,) + pred_boxes, pred_scores, pred_class_idx = self.pre_nms_postprocess( + *model_output + ) # Non Maximum Suppression on each batch pred_boxes, pred_scores, pred_class_idx = batched_nms( diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py index ab8656d4..010d04b4 100644 --- a/qai_hub_models/models/aotgan/export.py +++ b/qai_hub_models/models/aotgan/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,14 +117,13 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image,mask" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image,mask", sample_inputs, target_runtime ) @@ -190,7 +189,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -212,7 +211,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -227,7 +226,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_ort=False) + parser = export_parser(model_cls=Model, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/aotgan/model.py b/qai_hub_models/models/aotgan/model.py index d5d33563..f08c7afd 100644 --- a/qai_hub_models/models/aotgan/model.py +++ b/qai_hub_models/models/aotgan/model.py @@ -14,6 +14,7 @@ CachedWebModelAsset, SourceAsRoot, load_image, + wipe_sys_modules, ) from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.input_spec import InputSpec @@ -66,6 +67,10 @@ def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS): MODEL_ASSET_VERSION, source_repo_patches=AOTGAN_SOURCE_PATCHES, ): + import src + + wipe_sys_modules(src) + from src.model.aotgan import InpaintGenerator # AOT-GAN InpaintGenerator uses ArgParser to diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml index 98f6395e..44744ef1 100644 --- a/qai_hub_models/models/aotgan/perf.yaml +++ b/qai_hub_models/models/aotgan/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: AOT-GAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 164177.0 - throughput: 6.0909871662900406 + inference_time: 152887.0 + throughput: 6.540778483455101 estimated_peak_memory_range: - min: 3293184 - max: 6670400 + min: 16384 + max: 10328880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j1gle2z8p + job_id: jmg986lmp job_status: Passed torchscript_onnx_qnn: - inference_time: 165278.0 - throughput: 6.050412033059452 + inference_time: 152765.0 + throughput: 6.546002029260629 estimated_peak_memory_range: - min: 4321280 - max: 32279608 + min: 4263936 + max: 23819360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: jwgoen0xp + job_id: jqp48zl2g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +74,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:15:16Z' + timestamp: '2024-06-22T22:16:41Z' - torchscript_onnx_tflite: - inference_time: 120342.0 - throughput: 8.309650828472188 + inference_time: 112110.0 + throughput: 8.919810900008919 estimated_peak_memory_range: - min: 2510848 - max: 224329120 + min: 2555904 + max: 193280688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +88,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jw56qzj0g + job_id: jnp13r4n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 121373.0 - throughput: 8.2390647013751 + inference_time: 112558.0 + throughput: 8.884308534266777 estimated_peak_memory_range: - min: 0 - max: 141486816 + min: 4222976 + max: 114497360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: j1pvzrojg + job_id: j0pxmwk8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +112,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:15:17Z' + timestamp: '2024-06-22T22:16:42Z' - torchscript_onnx_tflite: - inference_time: 164129.0 - throughput: 6.092768493075568 + inference_time: 152465.0 + throughput: 6.558882366444758 estimated_peak_memory_range: - min: 12288 - max: 2291528 + min: 3207168 + max: 6406512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,14 +126,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j1p3q13l5 + job_id: jvgd0jx6p job_status: Passed torchscript_onnx_qnn: - inference_time: 164665.0 - throughput: 6.072935960890292 + inference_time: 152929.0 + throughput: 6.53898214203977 estimated_peak_memory_range: - min: 4337664 - max: 28704480 + min: 4329472 + max: 24015168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +141,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: jlpe4w115 + job_id: jegnxj6j5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +150,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:15:19Z' + timestamp: '2024-06-22T22:16:45Z' + - torchscript_onnx_tflite: + inference_time: 152823.0 + throughput: 6.543517664225934 + estimated_peak_memory_range: + min: 3313664 + max: 6413032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 235 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 235 + job_id: jz576qyng + job_status: Passed + torchscript_onnx_qnn: + inference_time: 153171.0 + throughput: 6.528650984847001 + estimated_peak_memory_range: + min: 3194880 + max: 22733104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 275 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 275 + job_id: jopr9zvkp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:16:46Z' - torchscript_onnx_qnn: - inference_time: 145570.0 - throughput: 6.869547296833138 + inference_time: 102536.0 + throughput: 9.752672232191621 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -162,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: j7gjk2mx5 + job_id: jo5m4jn75 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -171,4 +211,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:15:18Z' + timestamp: '2024-06-22T22:16:44Z' diff --git a/qai_hub_models/models/common.py b/qai_hub_models/models/common.py index fc75c06c..c76a0f25 100644 --- a/qai_hub_models/models/common.py +++ b/qai_hub_models/models/common.py @@ -11,16 +11,18 @@ class TargetRuntime(Enum): TFLITE = 0 QNN = 1 - ORT = 2 - PRECOMPILED_ORT = 3 + ONNX = 2 + PRECOMPILED_QNN_ONNX = 3 def __str__(self): return self.name.lower() @property def long_name(self): - if "precompiled" not in self.name.lower(): + if self.name.lower() in {"tflite", "qnn"}: return f"torchscript_onnx_{self.name.lower()}" + elif self.name.lower() == "onnx": + return f"torchscript_{self.name.lower()}" return f"{self.name.lower()}" diff --git a/qai_hub_models/models/controlnet_quantized/export.py b/qai_hub_models/models/controlnet_quantized/export.py index fedb6349..022f7297 100644 --- a/qai_hub_models/models/controlnet_quantized/export.py +++ b/qai_hub_models/models/controlnet_quantized/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py index 3e52ca90..5b67cefd 100644 --- a/qai_hub_models/models/convnext_tiny/export.py +++ b/qai_hub_models/models/convnext_tiny/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml index 70048eca..e16e930b 100644 --- a/qai_hub_models/models/convnext_tiny/perf.yaml +++ b/qai_hub_models/models/convnext_tiny/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ConvNext-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 5717.0 - throughput: 174.91691446562882 + inference_time: 5590.0 + throughput: 178.89087656529517 estimated_peak_memory_range: - min: 45056 - max: 2631376 + min: 28672 + max: 2570872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jvgd7qleg + job_id: jogkdjvvp job_status: Passed torchscript_onnx_qnn: - inference_time: 3769.0 - throughput: 265.32236667551075 + inference_time: 3772.0 + throughput: 265.11134676564154 estimated_peak_memory_range: - min: 81920 - max: 202159384 + min: 86016 + max: 202074888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: j0pxe6215 + job_id: j1p38ynm5 job_status: Passed - torchscript_onnx_ort: - inference_time: 16427.0 - throughput: 60.875388080599016 + torchscript_onnx: + inference_time: 16307.0 + throughput: 61.32335806708775 estimated_peak_memory_range: - min: 110592 - max: 152489568 + min: 12288 + max: 154338792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jep239n4g + job_id: jygzw124g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:16:04Z' + timestamp: '2024-06-22T22:17:35Z' - torchscript_onnx_tflite: - inference_time: 3988.0 - throughput: 250.75225677031094 + inference_time: 3967.0 + throughput: 252.07965717166624 estimated_peak_memory_range: min: 16384 - max: 212477920 + max: 216083312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jz57vl3l5 + job_id: jn5qwj0e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2732.0 - throughput: 366.03221083455344 + inference_time: 2744.0 + throughput: 364.4314868804665 estimated_peak_memory_range: - min: 741376 - max: 87297136 + min: 618496 + max: 80298128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jo5mv6yw5 + job_id: jwgomjz15 job_status: Passed - torchscript_onnx_ort: - inference_time: 11884.0 - throughput: 84.14675193537529 + torchscript_onnx: + inference_time: 11827.0 + throughput: 84.5522955948254 estimated_peak_memory_range: - min: 139571200 - max: 200346752 + min: 643072 + max: 54405632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jqpyvj07p + job_id: jz5wxjw4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:16:05Z' + timestamp: '2024-06-22T22:17:36Z' - torchscript_onnx_tflite: - inference_time: 5701.0 - throughput: 175.40782318891422 + inference_time: 5622.0 + throughput: 177.87264318747776 estimated_peak_memory_range: - min: 49152 - max: 2985728 + min: 53248 + max: 2296200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jqp4jd0vp + job_id: j1gl7j425 job_status: Passed torchscript_onnx_qnn: - inference_time: 3779.0 - throughput: 264.6202699126753 + inference_time: 3788.0 + throughput: 263.99155227032736 estimated_peak_memory_range: - min: 94208 - max: 182002576 + min: 86016 + max: 160277224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jopr12j9g + job_id: j7gj1jd1g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:16:03Z' + timestamp: '2024-06-22T22:17:32Z' + - torchscript_onnx_tflite: + inference_time: 5592.0 + throughput: 178.826895565093 + estimated_peak_memory_range: + min: 28672 + max: 2290704 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 328 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 328 + job_id: jw56vk2np + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3785.0 + throughput: 264.2007926023778 + estimated_peak_memory_range: + min: 32768 + max: 202380832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 223 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 223 + job_id: jlpe2jo8p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:17:34Z' - torchscript_onnx_qnn: - inference_time: 3907.0 - throughput: 255.9508574353724 + inference_time: 3768.0 + throughput: 265.3927813163482 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jegnrm8r5 + job_id: j1pv4jqzp job_status: Passed - torchscript_onnx_ort: - inference_time: 16908.0 - throughput: 59.143600662408325 + torchscript_onnx: + inference_time: 16939.0 + throughput: 59.03536218194699 estimated_peak_memory_range: - min: 294563840 - max: 294563840 + min: 294608896 + max: 294608896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j2p0e2765 + job_id: jmg9860mp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:16:06Z' + timestamp: '2024-06-22T22:17:37Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py index 39153360..e5aac917 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False) + parser = export_parser(model_cls=Model, supports_tflite=False, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml index 431c0b4f..08823d30 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml @@ -11,6 +11,8 @@ aggregated: - QCS8250 (Proxy) - QCS8550 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -30,6 +32,8 @@ aggregated: supported_chipsets: - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -39,11 +43,11 @@ models: - name: ConvNext-Tiny-w8a16-Quantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 3272.0 - throughput: 305.6234718826406 + inference_time: 3120.0 + throughput: 320.5128205128205 estimated_peak_memory_range: - min: 323584 - max: 8383168 + min: 16384 + max: 118146192 primary_compute_unit: NPU precision: int8 layer_info: @@ -51,7 +55,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jogkrqm25 + job_id: jvgd0jn6p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -60,13 +64,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:16:49Z' + timestamp: '2024-06-22T22:18:20Z' - torchscript_onnx_qnn: - inference_time: 2286.0 - throughput: 437.4453193350831 + inference_time: 2222.0 + throughput: 450.04500450045003 estimated_peak_memory_range: - min: 0 - max: 90112528 + min: 315392 + max: 80248880 primary_compute_unit: NPU precision: int8 layer_info: @@ -74,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jn5q9ro4p + job_id: jz5wxjwzp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -83,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:16:50Z' + timestamp: '2024-06-22T22:18:21Z' - torchscript_onnx_qnn: - inference_time: 3255.0 - throughput: 307.21966205837174 + inference_time: 3133.0 + throughput: 319.1828917969997 estimated_peak_memory_range: - min: 16384 - max: 11232112 + min: 28672 + max: 97061744 primary_compute_unit: NPU precision: int8 layer_info: @@ -97,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jw56qzl0g + job_id: jnp13r2k5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -106,13 +110,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:16:52Z' + timestamp: '2024-06-22T22:18:23Z' + - torchscript_onnx_qnn: + inference_time: 3120.0 + throughput: 320.5128205128205 + estimated_peak_memory_range: + min: 192512 + max: 129273744 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 215 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 215 + job_id: jvgd0jnkp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:18:25Z' - torchscript_onnx_qnn: - inference_time: 3567.0 - throughput: 280.3476310625175 + inference_time: 3257.0 + throughput: 307.0310101320233 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 331776 + max: 331776 primary_compute_unit: NPU precision: int8 layer_info: @@ -120,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j1gle2r8p + job_id: jmg9860qp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -129,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:16:51Z' + timestamp: '2024-06-22T22:18:22Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py index 54c73379..66fc1288 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False) + parser = export_parser(model_cls=Model, supports_tflite=False, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml index cc741e4a..d1248c52 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml @@ -11,6 +11,8 @@ aggregated: - QCS8250 (Proxy) - QCS8550 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -30,6 +32,8 @@ aggregated: supported_chipsets: - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -39,11 +43,11 @@ models: - name: ConvNext-Tiny-w8a8-Quantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 1723.0 - throughput: 580.3830528148578 + inference_time: 1711.0 + throughput: 584.4535359438925 estimated_peak_memory_range: - min: 12288 - max: 127334120 + min: 16384 + max: 126900200 primary_compute_unit: NPU precision: int8 layer_info: @@ -51,7 +55,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jygzvjkkp + job_id: jqpyn9rrg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -60,13 +64,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:17:38Z' + timestamp: '2024-06-22T22:19:13Z' - torchscript_onnx_qnn: - inference_time: 1207.0 - throughput: 828.5004142502071 + inference_time: 1194.0 + throughput: 837.5209380234506 estimated_peak_memory_range: - min: 12288 - max: 87553664 + min: 0 + max: 78843680 primary_compute_unit: NPU precision: int8 layer_info: @@ -74,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jz5wmq66g + job_id: j2p0kn325 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -83,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:17:39Z' + timestamp: '2024-06-22T22:19:15Z' - torchscript_onnx_qnn: - inference_time: 1724.0 - throughput: 580.046403712297 + inference_time: 1730.0 + throughput: 578.0346820809249 estimated_peak_memory_range: - min: 20480 - max: 10474536 + min: 167936 + max: 9009184 primary_compute_unit: NPU precision: int8 layer_info: @@ -97,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jnp1qez2g + job_id: jogkdj7yp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -106,13 +110,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:17:41Z' + timestamp: '2024-06-22T22:19:17Z' + - torchscript_onnx_qnn: + inference_time: 1732.0 + throughput: 577.3672055427252 + estimated_peak_memory_range: + min: 12288 + max: 116157904 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 215 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 215 + job_id: jn5qwje75 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:19:18Z' - torchscript_onnx_qnn: - inference_time: 1917.0 - throughput: 521.6484089723526 + inference_time: 1814.0 + throughput: 551.2679162072767 estimated_peak_memory_range: - min: 503808 - max: 503808 + min: 442368 + max: 442368 primary_compute_unit: NPU precision: int8 layer_info: @@ -120,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jmg99wnlg + job_id: j1p88l0zp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -129,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:17:40Z' + timestamp: '2024-06-22T22:19:16Z' diff --git a/qai_hub_models/models/ddrnet23_slim/app.py b/qai_hub_models/models/ddrnet23_slim/app.py index 50ecac58..ee216629 100644 --- a/qai_hub_models/models/ddrnet23_slim/app.py +++ b/qai_hub_models/models/ddrnet23_slim/app.py @@ -77,9 +77,8 @@ def segment_image( input_transform = normalize_image_transform() NCHW_fp32_torch_frames = input_transform(NCHW_fp32_torch_frames) - with torch.no_grad(): - # pred_mask is 8x downsampled - pred_masks = self.model(NCHW_fp32_torch_frames) + # pred_mask is 8x downsampled + pred_masks = self.model(NCHW_fp32_torch_frames) # Upsample pred mask to original image size # Need to upsample in the probability space, not in class labels diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py index 6f64655c..4d06a440 100644 --- a/qai_hub_models/models/ddrnet23_slim/export.py +++ b/qai_hub_models/models/ddrnet23_slim/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -225,7 +224,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ddrnet23_slim/model.py b/qai_hub_models/models/ddrnet23_slim/model.py index b9e0d9c8..5fbcac67 100644 --- a/qai_hub_models/models/ddrnet23_slim/model.py +++ b/qai_hub_models/models/ddrnet23_slim/model.py @@ -74,7 +74,7 @@ def from_pretrained(cls, checkpoint_path: str | None = None): model_dict.update(pretrained_dict) ddrnetslim_model.load_state_dict(model_dict) - ddrnetslim_model.to(torch.device("cpu")).eval() + ddrnetslim_model.to(torch.device("cpu")) return cls(ddrnetslim_model) diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml index 0bb7c379..3c1376d8 100644 --- a/qai_hub_models/models/ddrnet23_slim/perf.yaml +++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: DDRNet23-Slim performance_metrics: - torchscript_onnx_tflite: - inference_time: 6650.0 - throughput: 150.37593984962405 + inference_time: 5158.0 + throughput: 193.87359441644048 estimated_peak_memory_range: - min: 57344 - max: 27662296 + min: 1024000 + max: 2977832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jo5mv3xw5 + job_id: jlpe2je7p job_status: Passed - torchscript_onnx_ort: - inference_time: 9735.0 - throughput: 102.7221366204417 + torchscript_onnx: + inference_time: 9639.0 + throughput: 103.74520178441747 estimated_peak_memory_range: - min: 12599296 - max: 48937112 + min: 11792384 + max: 39932640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jep23ly4g + job_id: jo5m4jqy5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +74,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:18:08Z' + timestamp: '2024-06-22T22:19:54Z' - torchscript_onnx_tflite: - inference_time: 4742.0 - throughput: 210.88148460565162 + inference_time: 3672.0 + throughput: 272.33115468409585 estimated_peak_memory_range: - min: 16384 - max: 73234384 + min: 32768 + max: 70885712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +88,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jegnr3vr5 + job_id: jygzw1ozg job_status: Passed - torchscript_onnx_ort: - inference_time: 6012.0 - throughput: 166.333998669328 + torchscript_onnx: + inference_time: 6290.0 + throughput: 158.9825119236884 estimated_peak_memory_range: - min: 524288 - max: 42757008 + min: 11943936 + max: 50404560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jqpyv637p + job_id: jegnxjmv5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +112,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:18:09Z' + timestamp: '2024-06-22T22:19:55Z' - torchscript_onnx_tflite: - inference_time: 6672.0 - throughput: 149.8800959232614 + inference_time: 5176.0 + throughput: 193.19938176197837 estimated_peak_memory_range: - min: 991232 - max: 15704000 + min: 1015808 + max: 16060832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +126,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jopr1e39g + job_id: jz5wxj2zp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,10 +135,33 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:18:06Z' - - torchscript_onnx_ort: - inference_time: 9609.0 - throughput: 104.06910188365075 + timestamp: '2024-06-22T22:19:46Z' + - torchscript_onnx_tflite: + inference_time: 5150.0 + throughput: 194.1747572815534 + estimated_peak_memory_range: + min: 1003520 + max: 2689176 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 131 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 131 + job_id: jmg986jqp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:19:47Z' + - torchscript_onnx: + inference_time: 9690.0 + throughput: 103.19917440660474 estimated_peak_memory_range: min: 9854976 max: 9854976 @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: j2p0el065 + job_id: jopr9z2vp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:18:10Z' + timestamp: '2024-06-22T22:19:56Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py index e1a7394c..bcb43a12 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py index 29b2cacb..35adb487 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py @@ -29,7 +29,6 @@ def from_pretrained(cls, normalize_input: bool = True) -> DeepLabV3PlusMobilenet ).fetch() checkpoint = torch.load(dst, map_location=torch.device("cpu")) model.load_state_dict(checkpoint["state_dict"]) - model.eval() return cls(model, normalize_input) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml index dc3430eb..a9ad61d7 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: DeepLabV3-Plus-MobileNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 13047.0 - throughput: 76.64597225415804 + inference_time: 13087.0 + throughput: 76.41170627340108 estimated_peak_memory_range: - min: 21032960 - max: 22679264 + min: 19292160 + max: 21253792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 98 - job_id: jogkr3x25 + job_id: jqpyn9jrg job_status: Passed torchscript_onnx_qnn: - inference_time: 12852.0 - throughput: 77.8089013383131 + inference_time: 12849.0 + throughput: 77.82706825433885 estimated_peak_memory_range: - min: 4210688 - max: 20359032 + min: 3178496 + max: 18335232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jw56qn40g + job_id: jn5qwjr75 job_status: Passed - torchscript_onnx_ort: - inference_time: 17763.0 - throughput: 56.296796712267074 + torchscript_onnx: + inference_time: 18989.0 + throughput: 52.66206751277055 estimated_peak_memory_range: - min: 40357888 - max: 70272240 + min: 38780928 + max: 77536960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j7gjkenx5 + job_id: j1pv4jr7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:18:39Z' + timestamp: '2024-06-22T22:20:29Z' - torchscript_onnx_tflite: - inference_time: 9612.0 - throughput: 104.03662089055348 + inference_time: 9601.0 + throughput: 104.15581710238517 estimated_peak_memory_range: - min: 32768 - max: 69905408 + min: 36864 + max: 74388512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 98 - job_id: jn5q93q4p + job_id: j2p0kn225 job_status: Passed torchscript_onnx_qnn: - inference_time: 9482.0 - throughput: 105.4629824931449 + inference_time: 9463.0 + throughput: 105.67473317129874 estimated_peak_memory_range: - min: 3174400 - max: 58616848 + min: 3194880 + max: 52667232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: j1p3qe0l5 + job_id: j1gl7j2e5 job_status: Passed - torchscript_onnx_ort: - inference_time: 13976.0 - throughput: 71.55123068116771 + torchscript_onnx: + inference_time: 14165.0 + throughput: 70.5965407695023 estimated_peak_memory_range: - min: 53886976 - max: 88707568 + min: 50692096 + max: 83221760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jlpe4km15 + job_id: j7gj1j27g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:18:40Z' + timestamp: '2024-06-22T22:20:30Z' - torchscript_onnx_tflite: - inference_time: 13150.0 - throughput: 76.04562737642586 + inference_time: 13215.0 + throughput: 75.67158531971245 estimated_peak_memory_range: - min: 22147072 - max: 24149720 + min: 22130688 + max: 24019416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 98 - job_id: j1gle3m8p + job_id: j1p88lmzp job_status: Passed torchscript_onnx_qnn: - inference_time: 12879.0 - throughput: 77.64577995185962 + inference_time: 12920.0 + throughput: 77.39938080495357 estimated_peak_memory_range: - min: 3198976 - max: 19885424 + min: 3182592 + max: 18873576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: j1pvzvkjg + job_id: j1p38y1x5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:18:38Z' + timestamp: '2024-06-22T22:20:26Z' + - torchscript_onnx_tflite: + inference_time: 13135.0 + throughput: 76.13247049866769 + estimated_peak_memory_range: + min: 22151168 + max: 38548096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 98 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 98 + job_id: jogkdjqyp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 12927.0 + throughput: 77.35746886361878 + estimated_peak_memory_range: + min: 3198976 + max: 17945496 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 124 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 124 + job_id: jwgomjn45 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:20:27Z' - torchscript_onnx_qnn: - inference_time: 16510.0 - throughput: 60.56935190793458 + inference_time: 12858.0 + throughput: 77.77259293824856 estimated_peak_memory_range: min: 3170304 max: 3170304 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jwgoe36xp + job_id: jw56vkzvp job_status: Passed - torchscript_onnx_ort: - inference_time: 16653.0 - throughput: 60.04924037710923 + torchscript_onnx: + inference_time: 16624.0 + throughput: 60.15399422521656 estimated_peak_memory_range: - min: 105144320 - max: 105144320 + min: 104112128 + max: 104112128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jygzvrdkp + job_id: jlpe2jw7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:18:41Z' + timestamp: '2024-06-22T22:20:31Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py index 88702832..9aa372f3 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -127,7 +127,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -177,7 +177,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -217,7 +217,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py index 88e47bf8..5965fb3d 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py @@ -79,6 +79,5 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() final_model = cls(sim) return final_model diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml index d4f699a6..368425e4 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: DeepLabV3-Plus-MobileNet-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 3596.0 - throughput: 278.08676307007784 + inference_time: 3613.0 + throughput: 276.7783005812344 estimated_peak_memory_range: - min: 16384 - max: 1830768 + min: 24576 + max: 2019400 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 102 - job_id: jmg99wllg + job_id: jz5wxj3zp job_status: Passed torchscript_onnx_qnn: - inference_time: 5322.0 - throughput: 187.89928598271325 + inference_time: 5334.0 + throughput: 187.47656542932134 estimated_peak_memory_range: - min: 806912 - max: 7295144 + min: 811008 + max: 7181872 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jnp1qe48g - job_status: Passed - torchscript_onnx_ort: - inference_time: 16175.0 - throughput: 61.82380216383308 - estimated_peak_memory_range: - min: 42803200 - max: 54255496 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 120 - layers_on_gpu: 0 - layers_on_cpu: 51 - total_layers: 171 - job_id: j0pxeyk35 + job_id: j0pxmw6jg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:19:29Z' + timestamp: '2024-06-22T22:21:16Z' - torchscript_onnx_tflite: - inference_time: 2668.0 - throughput: 374.8125937031484 + inference_time: 2688.0 + throughput: 372.0238095238095 estimated_peak_memory_range: min: 12288 - max: 60104416 + max: 61340080 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 102 - job_id: jnp1qe42g + job_id: jmg986yqp job_status: Passed torchscript_onnx_qnn: - inference_time: 3937.0 - throughput: 254.00050800101602 + inference_time: 3901.0 + throughput: 256.3445270443476 estimated_peak_memory_range: min: 802816 - max: 61474288 + max: 57510912 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jvgd7oxrg - job_status: Passed - torchscript_onnx_ort: - inference_time: 12210.0 - throughput: 81.9000819000819 - estimated_peak_memory_range: - min: 33013760 - max: 87227648 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 120 - layers_on_gpu: 0 - layers_on_cpu: 51 - total_layers: 171 - job_id: jo5mv3nd5 + job_id: jo5m4j6y5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:19:30Z' + timestamp: '2024-06-22T22:21:18Z' - torchscript_onnx_tflite: inference_time: 3596.0 throughput: 278.08676307007784 estimated_peak_memory_range: min: 12288 - max: 8750088 + max: 9579064 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 102 - job_id: jvgd7oxeg + job_id: jnp13rwk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5333.0 - throughput: 187.51171948246764 + inference_time: 5340.0 + throughput: 187.26591760299627 estimated_peak_memory_range: - min: 20480 - max: 12661968 + min: 0 + max: 156467336 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jqp4jvl8p + job_id: jopr9zevp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:19:28Z' + timestamp: '2024-06-22T22:21:20Z' - torchscript_onnx_tflite: - inference_time: 14989.0 - throughput: 66.71559143371806 + inference_time: 3607.0 + throughput: 277.2387025228722 estimated_peak_memory_range: - min: 12288 - max: 39155056 + min: 16384 + max: 2009088 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 102 - job_id: jz5wmqe3g + job_id: jvgd0jqkp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5336.0 + throughput: 187.4062968515742 + estimated_peak_memory_range: + min: 24576 + max: 11405976 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 100 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 100 + job_id: jep2j2lx5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:21:21Z' + - torchscript_onnx_tflite: + inference_time: 14963.0 + throughput: 66.83151774376796 + estimated_peak_memory_range: + min: 40960 + max: 41957392 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 102 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 102 + job_id: jz576qlqg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T22:19:23Z' + timestamp: '2024-06-22T22:21:14Z' - torchscript_onnx_tflite: - inference_time: 126163.0 - throughput: 7.926254131559966 + inference_time: 121839.0 + throughput: 8.20755258989322 estimated_peak_memory_range: - min: 11575296 - max: 14463800 + min: 11517952 + max: 28744328 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 3 layers_on_cpu: 0 total_layers: 102 - job_id: jmg99wlwg + job_id: jqp48zdqg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T22:19:24Z' + timestamp: '2024-06-22T22:21:15Z' - torchscript_onnx_qnn: - inference_time: 5241.0 - throughput: 190.80328181644725 + inference_time: 4512.0 + throughput: 221.63120567375887 estimated_peak_memory_range: - min: 798720 - max: 798720 + min: 815104 + max: 815104 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jz57vxyv5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 22921.0 - throughput: 43.628113956633655 - estimated_peak_memory_range: - min: 59097088 - max: 59097088 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 120 - layers_on_gpu: 0 - layers_on_cpu: 51 - total_layers: 171 - job_id: jegnr36k5 + job_id: jegnxj3v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:19:31Z' + timestamp: '2024-06-22T22:21:19Z' diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py index 1f3fda9b..f9d79a7f 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/export.py +++ b/qai_hub_models/models/deeplabv3_resnet50/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,14 +117,13 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0,output_1" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -210,7 +209,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0,output_1", inference_result, target_runtime ) @@ -228,8 +227,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/deeplabv3_resnet50/model.py b/qai_hub_models/models/deeplabv3_resnet50/model.py index 9dc8cdb7..d43dff14 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/model.py +++ b/qai_hub_models/models/deeplabv3_resnet50/model.py @@ -22,7 +22,7 @@ class DeepLabV3_ResNet50(DeepLabV3Model): @classmethod def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> DeepLabV3_ResNet50: - model = tv_models.segmentation.deeplabv3_resnet50(weights=weights).eval() + model = tv_models.segmentation.deeplabv3_resnet50(weights=weights) return cls(model) def get_hub_compile_options( diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml index a21c01a6..f21df1a7 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml +++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: DeepLabV3-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 292980.0 - throughput: 3.413202266366305 + inference_time: 294244.0 + throughput: 3.39853998722149 estimated_peak_memory_range: - min: 2162688 - max: 149701296 + min: 12288 + max: 149248280 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,7 +50,7 @@ models: layers_on_gpu: 95 layers_on_cpu: 0 total_layers: 95 - job_id: jep23lkrg + job_id: jw56vknvp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +59,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:19:58Z' + timestamp: '2024-06-22T22:21:55Z' - torchscript_onnx_tflite: - inference_time: 223885.0 - throughput: 4.466578823949796 + inference_time: 211646.0 + throughput: 4.72487077478431 estimated_peak_memory_range: - min: 65536 - max: 32739680 + min: 21852160 + max: 55397216 primary_compute_unit: GPU precision: fp16 layer_info: @@ -71,7 +73,7 @@ models: layers_on_gpu: 95 layers_on_cpu: 0 total_layers: 95 - job_id: jqpyv618p + job_id: j1p38yex5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:19:59Z' + timestamp: '2024-06-22T22:21:56Z' - torchscript_onnx_tflite: - inference_time: 291243.0 - throughput: 3.4335589181542563 + inference_time: 290539.0 + throughput: 3.4418787150778383 estimated_peak_memory_range: - min: 5476352 - max: 182706000 + min: 2146304 + max: 149777592 primary_compute_unit: GPU precision: fp16 layer_info: @@ -94,7 +96,7 @@ models: layers_on_gpu: 95 layers_on_cpu: 0 total_layers: 95 - job_id: j2p0elz95 + job_id: jwgomj345 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,4 +105,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:20:00Z' + timestamp: '2024-06-22T22:21:57Z' + - torchscript_onnx_tflite: + inference_time: 291805.0 + throughput: 3.4269460770034783 + estimated_peak_memory_range: + min: 2203648 + max: 149323832 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 95 + layers_on_cpu: 0 + total_layers: 95 + job_id: j1pv4jv7p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:21:58Z' diff --git a/qai_hub_models/models/densenet121/export.py b/qai_hub_models/models/densenet121/export.py index 9f0c3592..43f99d5b 100644 --- a/qai_hub_models/models/densenet121/export.py +++ b/qai_hub_models/models/densenet121/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml index 4cfd87c1..8544b0cd 100644 --- a/qai_hub_models/models/densenet121/perf.yaml +++ b/qai_hub_models/models/densenet121/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: DenseNet-121 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1946.0 - throughput: 513.874614594039 + inference_time: 1943.0 + throughput: 514.668039114771 estimated_peak_memory_range: - min: 20480 - max: 2555328 + min: 45056 + max: 2879152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jygzvr2op + job_id: jqp48zvqg job_status: Passed torchscript_onnx_qnn: - inference_time: 1998.0 - throughput: 500.5005005005005 + inference_time: 1986.0 + throughput: 503.5246727089627 estimated_peak_memory_range: - min: 647168 - max: 7884416 + min: 643072 + max: 41424160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jnp1qe28g + job_id: jopr9zyvp job_status: Passed - torchscript_onnx_ort: - inference_time: 1954.0 - throughput: 511.77072671443193 + torchscript_onnx: + inference_time: 1989.0 + throughput: 502.76520864756156 estimated_peak_memory_range: - min: 16384 - max: 41751336 + min: 12288 + max: 56458408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j0pxey935 + job_id: jogkdjyyp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:20:34Z' + timestamp: '2024-06-22T22:22:38Z' - torchscript_onnx_tflite: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1314.0 + throughput: 761.03500761035 estimated_peak_memory_range: min: 12288 - max: 96529440 + max: 100921632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jz5wmqw3g + job_id: j0pxmwyjg job_status: Passed torchscript_onnx_qnn: - inference_time: 1329.0 - throughput: 752.4454477050414 + inference_time: 1321.0 + throughput: 757.002271006813 estimated_peak_memory_range: min: 618496 - max: 158201904 + max: 147109104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jvgd7onrg + job_id: jep2j2mx5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1326.0 - throughput: 754.1478129713424 + torchscript_onnx: + inference_time: 1320.0 + throughput: 757.5757575757576 estimated_peak_memory_range: - min: 618496 - max: 52734944 + min: 0 + max: 38797472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jo5mv3ed5 + job_id: jn5qwj275 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:20:35Z' + timestamp: '2024-06-22T22:22:39Z' - torchscript_onnx_tflite: - inference_time: 1932.0 - throughput: 517.5983436853002 + inference_time: 1921.0 + throughput: 520.5622071837585 estimated_peak_memory_range: - min: 24576 - max: 2118480 + min: 12288 + max: 2417360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jmg99w0wg + job_id: jo5m4j3y5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1991.0 - throughput: 502.26017076845807 + inference_time: 1981.0 + throughput: 504.79555779909134 estimated_peak_memory_range: - min: 16384 - max: 39662792 + min: 24576 + max: 40999864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jqp4jvn8p + job_id: j2p0knr25 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:20:33Z' + timestamp: '2024-06-22T22:22:36Z' + - torchscript_onnx_tflite: + inference_time: 1936.0 + throughput: 516.5289256198347 + estimated_peak_memory_range: + min: 12288 + max: 1911888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 312 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 312 + job_id: jegnxjev5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1980.0 + throughput: 505.050505050505 + estimated_peak_memory_range: + min: 20480 + max: 30511600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 372 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 372 + job_id: j1p88l7zp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:22:37Z' - torchscript_onnx_qnn: - inference_time: 2224.0 - throughput: 449.64028776978415 + inference_time: 1994.0 + throughput: 501.5045135406219 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jz57vx2v5 + job_id: jqpyn9drg job_status: Passed - torchscript_onnx_ort: - inference_time: 2023.0 - throughput: 494.3153732081068 + torchscript_onnx: + inference_time: 2032.0 + throughput: 492.12598425196853 estimated_peak_memory_range: - min: 647168 - max: 647168 + min: 692224 + max: 692224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jegnr30k5 + job_id: j1gl7jke5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:20:36Z' + timestamp: '2024-06-22T22:22:41Z' diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py index 8456a642..07919010 100644 --- a/qai_hub_models/models/detr_resnet101/export.py +++ b/qai_hub_models/models/detr_resnet101/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +215,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml index ab111618..405618e2 100644 --- a/qai_hub_models/models/detr_resnet101/perf.yaml +++ b/qai_hub_models/models/detr_resnet101/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: DETR-ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 24522.0 - throughput: 40.779708017290595 + inference_time: 23559.0 + throughput: 42.44662337111083 estimated_peak_memory_range: - min: 405504 - max: 3620824 + min: 397312 + max: 7286104 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 839 + layers_on_npu: 856 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 839 - job_id: jep23lxrg + total_layers: 856 + job_id: j1p38ymx5 job_status: Passed - torchscript_onnx_ort: - inference_time: 22510.0 - throughput: 44.4247001332741 + torchscript_onnx: + inference_time: 22403.0 + throughput: 44.63687898942106 estimated_peak_memory_range: - min: 53248 - max: 301197496 + min: 540672 + max: 282693624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jw56qn26g + job_id: jvgd0jvkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +74,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:21:16Z' + timestamp: '2024-06-22T22:23:24Z' - torchscript_onnx_tflite: - inference_time: 17593.0 - throughput: 56.840788950150625 + inference_time: 16655.0 + throughput: 60.04202942059442 estimated_peak_memory_range: - min: 385024 - max: 284374432 + min: 401408 + max: 300844176 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 839 + layers_on_npu: 856 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 839 - job_id: jqpyv6z8p + total_layers: 856 + job_id: jwgomjv45 job_status: Passed - torchscript_onnx_ort: - inference_time: 15841.0 - throughput: 63.127327820213374 + torchscript_onnx: + inference_time: 15968.0 + throughput: 62.62525050100201 estimated_peak_memory_range: - min: 38055936 - max: 153822592 + min: 4907008 + max: 91770592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j1p3qen35 + job_id: jz5wxj9jp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +112,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:21:17Z' + timestamp: '2024-06-22T22:23:25Z' - torchscript_onnx_tflite: - inference_time: 24627.0 - throughput: 40.60583911966541 + inference_time: 23444.0 + throughput: 42.65483705852244 estimated_peak_memory_range: - min: 413696 - max: 3309184 + min: 430080 + max: 4002928 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 839 + layers_on_npu: 856 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 839 - job_id: j2p0el495 + total_layers: 856 + job_id: j1pv4jw7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:21:11Z' - - torchscript_onnx_ort: - inference_time: 22958.0 - throughput: 43.557801202195314 + timestamp: '2024-06-22T22:23:15Z' + - torchscript_onnx_tflite: + inference_time: 23418.0 + throughput: 42.70219489281749 + estimated_peak_memory_range: + min: 401408 + max: 3285856 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 856 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 856 + job_id: j7gj1jl7g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:23:16Z' + - torchscript_onnx: + inference_time: 22866.0 + throughput: 43.733053441791306 estimated_peak_memory_range: - min: 100909056 - max: 100909056 + min: 106487808 + max: 106487808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jwgoe3zqp + job_id: jmg9864vp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:21:18Z' + timestamp: '2024-06-22T22:23:26Z' diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py index 260f917d..36780ea0 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/export.py +++ b/qai_hub_models/models/detr_resnet101_dc5/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +215,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml index 6760c8c6..d9839baa 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: DETR-ResNet101-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 139662.0 - throughput: 7.160143775687016 + inference_time: 139854.0 + throughput: 7.150313898780157 estimated_peak_memory_range: - min: 1216512 - max: 4184536 + min: 135168 + max: 3555384 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 840 + layers_on_npu: 857 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 840 - job_id: j7gjkezv5 + total_layers: 857 + job_id: jvgd0jvlp job_status: Passed - torchscript_onnx_ort: - inference_time: 125062.0 - throughput: 7.996033967152292 + torchscript_onnx: + inference_time: 134388.0 + throughput: 7.4411405780278 estimated_peak_memory_range: - min: 2994176 - max: 315584184 + min: 3080192 + max: 305207456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jz57vx0v5 + job_id: j2p0kn1e5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +74,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:21:59Z' + timestamp: '2024-06-22T22:24:10Z' - torchscript_onnx_tflite: - inference_time: 106500.0 - throughput: 9.389671361502348 + inference_time: 107736.0 + throughput: 9.281948466622113 estimated_peak_memory_range: - min: 991232 - max: 494886848 + min: 339968 + max: 509199680 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 840 + layers_on_npu: 857 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 840 - job_id: jlpe4keo5 + total_layers: 857 + job_id: jz576qjrg job_status: Passed - torchscript_onnx_ort: - inference_time: 96040.0 - throughput: 10.412328196584756 + torchscript_onnx: + inference_time: 95702.0 + throughput: 10.44910242210194 estimated_peak_memory_range: - min: 4145152 - max: 167656240 + min: 4952064 + max: 150418448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jqp4jvk8p + job_id: j1p88l38p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +112,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:22:00Z' + timestamp: '2024-06-22T22:24:11Z' - torchscript_onnx_tflite: - inference_time: 139388.0 - throughput: 7.174218727580566 + inference_time: 138735.0 + throughput: 7.2079864489854755 estimated_peak_memory_range: - min: 1548288 - max: 4377008 + min: 1204224 + max: 3962280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 840 + layers_on_npu: 857 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 840 - job_id: jygzvroop + total_layers: 857 + job_id: jqp48zxlg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:21:54Z' - - torchscript_onnx_ort: - inference_time: 124053.0 - throughput: 8.061070671406576 + timestamp: '2024-06-22T22:24:01Z' + - torchscript_onnx_tflite: + inference_time: 146672.0 + throughput: 6.81793389331297 + estimated_peak_memory_range: + min: 16384 + max: 3270192 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 857 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 857 + job_id: j0pxmw79g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:24:03Z' + - torchscript_onnx: + inference_time: 124132.0 + throughput: 8.05594045048819 estimated_peak_memory_range: - min: 73572352 - max: 73572352 + min: 77479936 + max: 77479936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j0pxeyn35 + job_id: jogkdjlop job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:22:01Z' + timestamp: '2024-06-22T22:24:12Z' diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py index aae80346..78f4d4e0 100644 --- a/qai_hub_models/models/detr_resnet50/export.py +++ b/qai_hub_models/models/detr_resnet50/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +215,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml index fff85076..05978ec6 100644 --- a/qai_hub_models/models/detr_resnet50/perf.yaml +++ b/qai_hub_models/models/detr_resnet50/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: DETR-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 21615.0 - throughput: 46.26416840157298 + inference_time: 19708.0 + throughput: 50.74081591231987 estimated_peak_memory_range: - min: 2134016 - max: 5200288 + min: 413696 + max: 3185024 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 771 + layers_on_npu: 788 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 771 - job_id: jegnr3lk5 + total_layers: 788 + job_id: j1gl7j0l5 job_status: Passed - torchscript_onnx_ort: - inference_time: 16643.0 - throughput: 60.08532115604158 + torchscript_onnx: + inference_time: 16775.0 + throughput: 59.61251862891207 estimated_peak_memory_range: - min: 1540096 - max: 211446576 + min: 1146880 + max: 209226928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: j1gle36jp + job_id: jmg9863vp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +74,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:22:37Z' + timestamp: '2024-06-22T22:24:51Z' - torchscript_onnx_tflite: - inference_time: 15132.0 - throughput: 66.08511763150939 + inference_time: 13806.0 + throughput: 72.43227582210633 estimated_peak_memory_range: - min: 36864 - max: 231347824 + min: 385024 + max: 245987728 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 771 + layers_on_npu: 788 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 771 - job_id: jep23l0rg + total_layers: 788 + job_id: jw56vk37p job_status: Passed - torchscript_onnx_ort: - inference_time: 11694.0 - throughput: 85.51393877201984 + torchscript_onnx: + inference_time: 11436.0 + throughput: 87.44316194473592 estimated_peak_memory_range: - min: 2809856 - max: 97007056 + min: 3637248 + max: 83196560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jw56qne6g + job_id: jnp13rdl5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +112,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:22:38Z' + timestamp: '2024-06-22T22:24:52Z' - torchscript_onnx_tflite: - inference_time: 21665.0 - throughput: 46.157396722824835 + inference_time: 19684.0 + throughput: 50.80268238162975 estimated_peak_memory_range: - min: 438272 - max: 4576272 + min: 385024 + max: 3607136 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 771 + layers_on_npu: 788 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 771 - job_id: jqpyv6r8p + total_layers: 788 + job_id: j1p38y4z5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:22:32Z' - - torchscript_onnx_ort: - inference_time: 16944.0 - throughput: 59.01794145420208 + timestamp: '2024-06-22T22:24:42Z' + - torchscript_onnx_tflite: + inference_time: 19665.0 + throughput: 50.851767098906684 + estimated_peak_memory_range: + min: 16384 + max: 2576216 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 788 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 788 + job_id: jwgomj1d5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:24:44Z' + - torchscript_onnx: + inference_time: 17051.0 + throughput: 58.64758665180928 estimated_peak_memory_range: - min: 116158464 - max: 116158464 + min: 122880 + max: 122880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: j1p3qev35 + job_id: jvgd0jrlp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:22:39Z' + timestamp: '2024-06-22T22:24:53Z' diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py index af83c17f..292942fb 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/export.py +++ b/qai_hub_models/models/detr_resnet50_dc5/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +215,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml index b9e42be2..6292defe 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: DETR-ResNet50-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 133335.0 - throughput: 7.49990625117186 + inference_time: 142484.0 + throughput: 7.018331882878077 estimated_peak_memory_range: - min: 135168 - max: 3805824 + min: 16384 + max: 111187464 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 772 + layers_on_npu: 789 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 772 - job_id: jz5wmq33g + total_layers: 789 + job_id: jqp48zjlg job_status: Passed - torchscript_onnx_ort: - inference_time: 117630.0 - throughput: 8.501232678738416 + torchscript_onnx: + inference_time: 126992.0 + throughput: 7.874511780269623 estimated_peak_memory_range: - min: 2134016 - max: 232241232 + min: 2805760 + max: 227776696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jo5mv36d5 + job_id: jogkdjrop job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +74,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:23:14Z' + timestamp: '2024-06-22T22:25:34Z' - torchscript_onnx_tflite: - inference_time: 102075.0 - throughput: 9.796718099436688 + inference_time: 102232.0 + throughput: 9.78167305735973 estimated_peak_memory_range: - min: 163840 - max: 444293712 + min: 1269760 + max: 456777392 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 772 + layers_on_npu: 789 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 772 - job_id: jmg99wywg + total_layers: 789 + job_id: j0pxmwe9g job_status: Passed - torchscript_onnx_ort: - inference_time: 90172.0 - throughput: 11.089917047420485 + torchscript_onnx: + inference_time: 90670.0 + throughput: 11.029006286533583 estimated_peak_memory_range: - min: 6778880 - max: 152435808 + min: 6930432 + max: 133973776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jegnr3mk5 + job_id: jn5qwj9m5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +112,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:23:15Z' + timestamp: '2024-06-22T22:25:35Z' - torchscript_onnx_tflite: - inference_time: 132335.0 - throughput: 7.556579891940908 + inference_time: 131821.0 + throughput: 7.586044712147533 estimated_peak_memory_range: - min: 1204224 - max: 4586176 + min: 1179648 + max: 108134792 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 772 + layers_on_npu: 789 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 772 - job_id: jnp1qew8g + total_layers: 789 + job_id: jo5m4jvq5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:23:09Z' - - torchscript_onnx_ort: - inference_time: 116939.0 - throughput: 8.551467004164564 + timestamp: '2024-06-22T22:25:25Z' + - torchscript_onnx_tflite: + inference_time: 131445.0 + throughput: 7.607744684088402 + estimated_peak_memory_range: + min: 1212416 + max: 4305656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 789 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 789 + job_id: jegnxjrm5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:25:26Z' + - torchscript_onnx: + inference_time: 116855.0 + throughput: 8.557614137178554 estimated_peak_memory_range: - min: 22482944 - max: 22482944 + min: 136323072 + max: 136323072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jopr1e20g + job_id: j1gl7jel5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:23:16Z' + timestamp: '2024-06-22T22:25:36Z' diff --git a/qai_hub_models/models/efficientnet_b0/export.py b/qai_hub_models/models/efficientnet_b0/export.py index d4d7827c..872b7a3c 100644 --- a/qai_hub_models/models/efficientnet_b0/export.py +++ b/qai_hub_models/models/efficientnet_b0/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml index 6383102d..6c34af8a 100644 --- a/qai_hub_models/models/efficientnet_b0/perf.yaml +++ b/qai_hub_models/models/efficientnet_b0/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: EfficientNet-B0 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1626.0 - throughput: 615.0061500615006 + inference_time: 1607.0 + throughput: 622.2775357809583 estimated_peak_memory_range: - min: 16384 - max: 1985056 + min: 12288 + max: 2260744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jqpyv6j8p + job_id: j1p38yqz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1678.0 - throughput: 595.9475566150179 + inference_time: 1668.0 + throughput: 599.5203836930456 estimated_peak_memory_range: - min: 16384 - max: 315561544 + min: 622592 + max: 78658688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jogkr3qw5 + job_id: jlpe2j40p job_status: Passed - torchscript_onnx_ort: - inference_time: 1623.0 - throughput: 616.1429451632779 + torchscript_onnx: + inference_time: 1644.0 + throughput: 608.272506082725 estimated_peak_memory_range: - min: 16384 - max: 80982248 + min: 12288 + max: 58718544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1p3qe135 + job_id: jvgd0j7lp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:23:45Z' + timestamp: '2024-06-22T22:26:06Z' - torchscript_onnx_tflite: - inference_time: 1142.0 - throughput: 875.6567425569177 + inference_time: 1143.0 + throughput: 874.8906386701663 estimated_peak_memory_range: min: 16384 - max: 72610976 + max: 76018192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j2p0el295 + job_id: jwgomjed5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1186.0 - throughput: 843.1703204047218 + inference_time: 1179.0 + throughput: 848.1764206955047 estimated_peak_memory_range: min: 618496 - max: 72353488 + max: 60351408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jn5q93rnp + job_id: jygzw1v6g job_status: Passed - torchscript_onnx_ort: - inference_time: 1173.0 - throughput: 852.5149190110827 + torchscript_onnx: + inference_time: 1177.0 + throughput: 849.6176720475786 estimated_peak_memory_range: min: 618496 - max: 36882944 + max: 28149584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jwgoe3nqp + job_id: jz576q6rg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:23:46Z' + timestamp: '2024-06-22T22:26:07Z' - torchscript_onnx_tflite: - inference_time: 1631.0 - throughput: 613.1207847946046 + inference_time: 1618.0 + throughput: 618.0469715698393 estimated_peak_memory_range: - min: 16384 - max: 2841808 + min: 12288 + max: 1906624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1p8wzmkp + job_id: j1pv4jzmp job_status: Passed torchscript_onnx_qnn: - inference_time: 1683.0 - throughput: 594.1770647653001 + inference_time: 1666.0 + throughput: 600.2400960384153 estimated_peak_memory_range: - min: 622592 - max: 88821056 + min: 618496 + max: 88931072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jw56qnz6g + job_id: jmg9869vp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:23:44Z' + timestamp: '2024-06-22T22:26:04Z' + - torchscript_onnx_tflite: + inference_time: 1624.0 + throughput: 615.7635467980296 + estimated_peak_memory_range: + min: 28672 + max: 2761664 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 245 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 245 + job_id: j7gj1jk8g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1685.0 + throughput: 593.4718100890208 + estimated_peak_memory_range: + min: 622592 + max: 308210976 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 243 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 243 + job_id: jnp13rql5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:26:05Z' - torchscript_onnx_qnn: - inference_time: 1838.0 - throughput: 544.069640914037 + inference_time: 1760.0 + throughput: 568.1818181818181 estimated_peak_memory_range: - min: 1310720 - max: 1310720 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j1gle32jp + job_id: jz5wxjmjp job_status: Passed - torchscript_onnx_ort: - inference_time: 1641.0 - throughput: 609.3845216331505 + torchscript_onnx: + inference_time: 1690.0 + throughput: 591.7159763313609 estimated_peak_memory_range: - min: 32149504 - max: 32149504 + min: 31383552 + max: 31383552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1pvzvvkg + job_id: jqp48z8lg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:23:47Z' + timestamp: '2024-06-22T22:26:08Z' diff --git a/qai_hub_models/models/esrgan/export.py b/qai_hub_models/models/esrgan/export.py index 47c6b95d..2043234d 100644 --- a/qai_hub_models/models/esrgan/export.py +++ b/qai_hub_models/models/esrgan/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml index e35a93e5..055e2306 100644 --- a/qai_hub_models/models/esrgan/perf.yaml +++ b/qai_hub_models/models/esrgan/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ESRGAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 66520.0 - throughput: 15.033072760072159 + inference_time: 68462.0 + throughput: 14.606643101282463 estimated_peak_memory_range: - min: 4288512 - max: 7346848 + min: 4292608 + max: 12423592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jlpe4kko5 + job_id: jo5m4j4q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 67593.0 - throughput: 14.794431376030062 + inference_time: 65477.0 + throughput: 15.272538448615544 estimated_peak_memory_range: - min: 73728 - max: 104762776 + min: 102400 + max: 106544352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jmg99wwwg + job_id: jqpyn9n4g job_status: Passed - torchscript_onnx_ort: - inference_time: 68322.0 - throughput: 14.636573870788325 + torchscript_onnx: + inference_time: 69101.0 + throughput: 14.47157059955717 estimated_peak_memory_range: - min: 6356992 - max: 154422496 + min: 6369280 + max: 152418736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jmg99ww8g + job_id: j1gl7j7l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:24:28Z' + timestamp: '2024-06-22T22:26:52Z' - torchscript_onnx_tflite: - inference_time: 56935.0 - throughput: 17.56388864494599 + inference_time: 52385.0 + throughput: 19.08943399828195 estimated_peak_memory_range: - min: 86016 - max: 583340176 + min: 3260416 + max: 610205168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jygzvrrop + job_id: jegnxjxm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 50707.0 - throughput: 19.72114303745045 + inference_time: 49752.0 + throughput: 20.099694484643834 estimated_peak_memory_range: - min: 73728 - max: 260404000 + min: 90112 + max: 225116176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jnp1qee8g + job_id: j2p0knke5 job_status: Passed - torchscript_onnx_ort: - inference_time: 51557.0 - throughput: 19.396008301491552 + torchscript_onnx: + inference_time: 52240.0 + throughput: 19.142419601837673 estimated_peak_memory_range: - min: 5955584 - max: 196150816 + min: 6713344 + max: 170523312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jnp1qee7g + job_id: jw56vkv7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:24:29Z' + timestamp: '2024-06-22T22:26:53Z' - torchscript_onnx_tflite: - inference_time: 65283.0 - throughput: 15.31792350229003 + inference_time: 68152.0 + throughput: 14.673083695269398 estimated_peak_memory_range: - min: 1536000 - max: 4290816 + min: 24576 + max: 53347056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jz5wmqq3g + job_id: jopr9z9ep job_status: Passed torchscript_onnx_qnn: - inference_time: 65436.0 - throughput: 15.282107708295127 + inference_time: 68483.0 + throughput: 14.602164040710834 estimated_peak_memory_range: - min: 2744320 - max: 60284768 + min: 143360 + max: 109872728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jz5wmqqmg + job_id: jogkdjdop job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:24:27Z' + timestamp: '2024-06-22T22:26:50Z' + - torchscript_onnx_tflite: + inference_time: 75507.0 + throughput: 13.243805210112969 + estimated_peak_memory_range: + min: 3284992 + max: 6621224 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1024 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1024 + job_id: jep2j2jm5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 66483.0 + throughput: 15.041439164899298 + estimated_peak_memory_range: + min: 159744 + max: 102750576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1026 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1026 + job_id: jn5qwjwm5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:26:51Z' - torchscript_onnx_qnn: - inference_time: 73135.0 - throughput: 13.67334381623026 + inference_time: 65392.0 + throughput: 15.292390506483974 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jvgd7oorg + job_id: j1p88l88p job_status: Passed - torchscript_onnx_ort: - inference_time: 65785.0 - throughput: 15.20103367028958 + torchscript_onnx: + inference_time: 65763.0 + throughput: 15.206118942262366 estimated_peak_memory_range: - min: 208896 - max: 208896 + min: 274432 + max: 274432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jvgd7oozg + job_id: j1p38y8z5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:24:30Z' + timestamp: '2024-06-22T22:26:54Z' diff --git a/qai_hub_models/models/facebook_denoiser/app.py b/qai_hub_models/models/facebook_denoiser/app.py index 537fc024..26948493 100644 --- a/qai_hub_models/models/facebook_denoiser/app.py +++ b/qai_hub_models/models/facebook_denoiser/app.py @@ -60,27 +60,26 @@ def denoise_audio( Returns: Predicted audio. See `raw_output` parameter above for type of return value. """ - with torch.no_grad(): - all_inputs_are_paths = True - - noisy_audios = [] - for audio in input_audio: - if isinstance(audio, str) or isinstance(audio, Path): - audio, sample_rate = torchaudio.load(audio) - assert sample_rate == self.sample_rate - else: - all_inputs_are_paths = False - if isinstance(audio, np.ndarray): - audio = torch.from_numpy(audio) - noisy_audios.append(audio) - - estimates = [] - for noisy in noisy_audios: - out = self.denoiser(noisy) - out = out / max(out.abs().max().item(), 1) # Normalize - if all_inputs_are_paths: - # We don't run files in batches, take the first batch output - out = out[:, 0] - estimates.append(out) - - return estimates + all_inputs_are_paths = True + + noisy_audios = [] + for audio in input_audio: + if isinstance(audio, str) or isinstance(audio, Path): + audio, sample_rate = torchaudio.load(audio) + assert sample_rate == self.sample_rate + else: + all_inputs_are_paths = False + if isinstance(audio, np.ndarray): + audio = torch.from_numpy(audio) + noisy_audios.append(audio) + + estimates = [] + for noisy in noisy_audios: + out = self.denoiser(noisy) + out = out / max(out.abs().max().item(), 1) # Normalize + if all_inputs_are_paths: + # We don't run files in batches, take the first batch output + out = out[:, 0] + estimates.append(out) + + return estimates diff --git a/qai_hub_models/models/facebook_denoiser/export.py b/qai_hub_models/models/facebook_denoiser/export.py index 11ecd12d..18d0f13c 100644 --- a/qai_hub_models/models/facebook_denoiser/export.py +++ b/qai_hub_models/models/facebook_denoiser/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -115,7 +115,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # 2. Compile the model to an on-device asset @@ -172,7 +171,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -202,7 +201,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/facebook_denoiser/perf.yaml b/qai_hub_models/models/facebook_denoiser/perf.yaml index 912fe7c1..ad4e6e61 100644 --- a/qai_hub_models/models/facebook_denoiser/perf.yaml +++ b/qai_hub_models/models/facebook_denoiser/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Facebook-Denoiser performance_metrics: - torchscript_onnx_tflite: - inference_time: 762754.0 - throughput: 1.3110386835073955 + inference_time: 764677.0 + throughput: 1.307741700090365 estimated_peak_memory_range: - min: 271872000 - max: 745165216 + min: 277712896 + max: 349060880 primary_compute_unit: CPU precision: fp32 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 205 total_layers: 205 - job_id: jqp4jvv1p + job_id: j1pv4j4mp job_status: Passed - torchscript_onnx_ort: - inference_time: 14425872.0 - throughput: 0.06931989969133236 + torchscript_onnx: + inference_time: 14500590.0 + throughput: 0.06896271117244195 estimated_peak_memory_range: - min: 73728 - max: 97772968 + min: 720896 + max: 75991544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 178 - job_id: jegnr33q5 + job_id: jz5wxjxjp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +74,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:24:56Z' + timestamp: '2024-06-22T22:27:22Z' - torchscript_onnx_tflite: - inference_time: 700116.0 - throughput: 1.4283347331013718 + inference_time: 778927.0 + throughput: 1.2838173538726991 estimated_peak_memory_range: - min: 418246656 - max: 442262688 + min: 484712448 + max: 504708688 primary_compute_unit: CPU precision: fp32 layer_info: @@ -86,14 +88,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 205 total_layers: 205 - job_id: j0pxeyyl5 + job_id: j7gj1j18g job_status: Passed - torchscript_onnx_ort: - inference_time: 10632015.0 - throughput: 0.09405554826625057 + torchscript_onnx: + inference_time: 10659565.0 + throughput: 0.09381245857593626 estimated_peak_memory_range: - min: 16744448 - max: 226752096 + min: 19398656 + max: 230929712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 178 - job_id: jopr1ee7g + job_id: jmg9868vp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +112,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:24:57Z' + timestamp: '2024-06-22T22:27:23Z' - torchscript_onnx_tflite: - inference_time: 733772.0 - throughput: 1.3628211488037156 + inference_time: 699741.0 + throughput: 1.4291001956438167 estimated_peak_memory_range: - min: 89939968 - max: 463947896 + min: 365686784 + max: 437035544 primary_compute_unit: CPU precision: fp32 layer_info: @@ -124,7 +126,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 205 total_layers: 205 - job_id: jo5mv3395 + job_id: jlpe2j20p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:24:54Z' - - torchscript_onnx_ort: - inference_time: 15555145.0 - throughput: 0.06428741101416927 + timestamp: '2024-06-22T22:27:18Z' + - torchscript_onnx_tflite: + inference_time: 750714.0 + throughput: 1.3320652072560255 + estimated_peak_memory_range: + min: 461258752 + max: 464483536 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 205 + total_layers: 205 + job_id: jygzw1w6g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:27:19Z' + - torchscript_onnx: + inference_time: 15624962.0 + throughput: 0.06400015564837853 estimated_peak_memory_range: - min: 450560 - max: 450560 + min: 446464 + max: 446464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 178 - job_id: jep23llqg + job_id: jnp13r3l5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:24:58Z' + timestamp: '2024-06-22T22:27:24Z' diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py index aad089b0..a27b1cb6 100644 --- a/qai_hub_models/models/fastsam_s/export.py +++ b/qai_hub_models/models/fastsam_s/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,7 +117,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace( model.to("cpu"), make_torch_inputs(input_spec), check_trace=False ) @@ -126,7 +125,7 @@ def export_model( channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_1,output_2,output_3,output_5" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -172,7 +171,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -190,7 +189,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -212,7 +211,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_1,output_2,output_3,output_5", inference_result, target_runtime ) @@ -228,7 +227,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml index 708e66d8..7b83f3f9 100644 --- a/qai_hub_models/models/fastsam_s/perf.yaml +++ b/qai_hub_models/models/fastsam_s/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -35,27 +37,12 @@ aggregated: models: - name: FastSam-S performance_metrics: - - torchscript_onnx_tflite: - inference_time: 8700.0 - throughput: 114.94252873563218 + - torchscript_onnx: + inference_time: 10777.0 + throughput: 92.79020135473694 estimated_peak_memory_range: - min: 8429568 - max: 39456112 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 288 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 288 - job_id: j2p0elln5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 10893.0 - throughput: 91.80207472688883 - estimated_peak_memory_range: - min: 26902528 - max: 83130600 + min: 19591168 + max: 74568352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +50,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jwgoe3vkp + job_id: j1p88l68p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +59,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:25:27Z' - - torchscript_onnx_tflite: - inference_time: 6426.0 - throughput: 155.6178026766262 - estimated_peak_memory_range: - min: 6594560 - max: 79404896 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 288 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 288 - job_id: j1p8wzzop - job_status: Passed - torchscript_onnx_ort: - inference_time: 7507.0 - throughput: 133.20900492873318 + timestamp: '2024-06-22T22:27:57Z' + - torchscript_onnx: + inference_time: 7538.0 + throughput: 132.66118333775538 estimated_peak_memory_range: - min: 27897856 - max: 69661040 + min: 22614016 + max: 55712784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j1pvzvwrg + job_id: jogkdjoop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,36 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:25:28Z' - - torchscript_onnx_tflite: - inference_time: 8693.0 - throughput: 115.03508570113885 - estimated_peak_memory_range: - min: 3923968 - max: 21721296 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 288 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 288 - job_id: jogkr33n5 - job_status: Passed - reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: Qcs8550 - timestamp: '2024-06-08T22:25:21Z' - - torchscript_onnx_ort: - inference_time: 10798.0 - throughput: 92.60974254491572 + timestamp: '2024-06-22T22:27:58Z' + - torchscript_onnx: + inference_time: 10922.0 + throughput: 91.55832265152902 estimated_peak_memory_range: - min: 72966144 - max: 72966144 + min: 68141056 + max: 68141056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j7gjkele5 + job_id: jn5qwjzm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +105,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:25:29Z' + timestamp: '2024-06-22T22:27:59Z' diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py index fb2a5872..80ca97d9 100644 --- a/qai_hub_models/models/fastsam_x/export.py +++ b/qai_hub_models/models/fastsam_x/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,7 +117,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace( model.to("cpu"), make_torch_inputs(input_spec), check_trace=False ) @@ -126,7 +125,7 @@ def export_model( channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_1,output_2,output_3,output_5" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -172,7 +171,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -190,7 +189,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -212,7 +211,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_1,output_2,output_3,output_5", inference_result, target_runtime ) @@ -228,7 +227,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, + supports_tflite=False, + supports_qnn=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml index 51138c0a..4627470a 100644 --- a/qai_hub_models/models/fastsam_x/perf.yaml +++ b/qai_hub_models/models/fastsam_x/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -35,27 +37,12 @@ aggregated: models: - name: FastSam-X performance_metrics: - - torchscript_onnx_tflite: - inference_time: 53656.0 - throughput: 18.637244669748025 + - torchscript_onnx: + inference_time: 54458.0 + throughput: 18.362774982555365 estimated_peak_memory_range: - min: 9220096 - max: 14211840 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 420 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 420 - job_id: jygzvr7xp - job_status: Passed - torchscript_onnx_ort: - inference_time: 51625.0 - throughput: 19.37046004842615 - estimated_peak_memory_range: - min: 25325568 - max: 343683192 + min: 15622144 + max: 336071040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +50,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: j0pxey1l5 + job_id: jnp13r7l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +59,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:26:04Z' - - torchscript_onnx_tflite: - inference_time: 36229.0 - throughput: 27.602197134891938 - estimated_peak_memory_range: - min: 8450048 - max: 144127216 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 420 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 420 - job_id: jz5wmq9mg - job_status: Passed - torchscript_onnx_ort: - inference_time: 37119.0 - throughput: 26.94038093698645 + timestamp: '2024-06-22T22:28:37Z' + - torchscript_onnx: + inference_time: 37262.0 + throughput: 26.83699210992432 estimated_peak_memory_range: - min: 29941760 - max: 95002704 + min: 30011392 + max: 85984848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jo5mv3z95 + job_id: jvgd0j8lp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,36 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:26:05Z' - - torchscript_onnx_tflite: - inference_time: 49800.0 - throughput: 20.080321285140563 - estimated_peak_memory_range: - min: 9379840 - max: 47006488 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 420 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 420 - job_id: jmg99w48g - job_status: Passed - reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: Qcs8550 - timestamp: '2024-06-08T22:25:58Z' - - torchscript_onnx_ort: - inference_time: 49559.0 - throughput: 20.177969692689523 + timestamp: '2024-06-22T22:28:38Z' + - torchscript_onnx: + inference_time: 49517.0 + throughput: 20.1950845164287 estimated_peak_memory_range: - min: 30785536 - max: 30785536 + min: 36007936 + max: 36007936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jegnr3eq5 + job_id: jz5wxj16p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +105,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:26:06Z' + timestamp: '2024-06-22T22:28:39Z' diff --git a/qai_hub_models/models/fcn_resnet50/app.py b/qai_hub_models/models/fcn_resnet50/app.py index 08d0329a..2f8fae77 100644 --- a/qai_hub_models/models/fcn_resnet50/app.py +++ b/qai_hub_models/models/fcn_resnet50/app.py @@ -65,9 +65,8 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: """ input_tensor = preprocess_image(image) - with torch.no_grad(): - output = self.model(input_tensor) - output = output[0] + output = self.model(input_tensor) + output = output[0] predictions = output.argmax(0).byte().cpu().numpy() if raw_output: diff --git a/qai_hub_models/models/fcn_resnet50/export.py b/qai_hub_models/models/fcn_resnet50/export.py index cc121898..c5b241c3 100644 --- a/qai_hub_models/models/fcn_resnet50/export.py +++ b/qai_hub_models/models/fcn_resnet50/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/fcn_resnet50/model.py b/qai_hub_models/models/fcn_resnet50/model.py index e336cecf..bf95d3a4 100644 --- a/qai_hub_models/models/fcn_resnet50/model.py +++ b/qai_hub_models/models/fcn_resnet50/model.py @@ -31,7 +31,7 @@ def __init__( @classmethod def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> FCN_ResNet50: - model = tv_models.segmentation.fcn_resnet50(weights=weights).eval() + model = tv_models.segmentation.fcn_resnet50(weights=weights) model.aux_classifier = None return cls(model) diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml index 8d3c9539..cf9278eb 100644 --- a/qai_hub_models/models/fcn_resnet50/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: FCN-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 41432.0 - throughput: 24.135933577910794 + inference_time: 42095.0 + throughput: 23.75579047392802 estimated_peak_memory_range: - min: 22097920 - max: 25129176 + min: 22130688 + max: 24721192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jep23lmqg + job_id: jnp13rv25 job_status: Passed torchscript_onnx_qnn: - inference_time: 42249.0 - throughput: 23.669199270988663 + inference_time: 42393.0 + throughput: 23.58880003774208 estimated_peak_memory_range: - min: 3497984 - max: 21232048 + min: 3244032 + max: 20959008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j1p8wz7op + job_id: j0pxmwd1g job_status: Passed - torchscript_onnx_ort: - inference_time: 43347.0 - throughput: 23.069647265093316 + torchscript_onnx: + inference_time: 43645.0 + throughput: 22.912131973880168 estimated_peak_memory_range: - min: 44056576 - max: 204120472 + min: 44417024 + max: 203720776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jw56qn3yg + job_id: jqpyn977g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:26:38Z' + timestamp: '2024-06-22T22:29:16Z' - torchscript_onnx_tflite: - inference_time: 31357.0 - throughput: 31.890805880664605 + inference_time: 30857.0 + throughput: 32.407557442395564 estimated_peak_memory_range: - min: 49152 - max: 137281408 + min: 21643264 + max: 161762880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jqpyv6dlp + job_id: jvgd0jzep job_status: Passed torchscript_onnx_qnn: - inference_time: 31599.0 - throughput: 31.64657109402196 + inference_time: 31702.0 + throughput: 31.54375118289067 estimated_peak_memory_range: - min: 3162112 - max: 80794592 + min: 2564096 + max: 75592160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jogkr3yn5 + job_id: jo5m4jdw5 job_status: Passed - torchscript_onnx_ort: - inference_time: 32324.0 - throughput: 30.936765251825268 + torchscript_onnx: + inference_time: 31938.0 + throughput: 31.31066441229883 estimated_peak_memory_range: - min: 43311104 - max: 107423312 + min: 41369600 + max: 109154816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j1p3qe4n5 + job_id: j2p0knv65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:26:39Z' + timestamp: '2024-06-22T22:29:17Z' - torchscript_onnx_tflite: - inference_time: 41734.0 - throughput: 23.9612785738247 + inference_time: 42583.0 + throughput: 23.483549773383746 estimated_peak_memory_range: min: 22106112 - max: 24857096 + max: 24637056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j2p0elrn5 + job_id: jz576q7lg job_status: Passed torchscript_onnx_qnn: - inference_time: 42169.0 - throughput: 23.714102776921436 + inference_time: 42400.0 + throughput: 23.58490566037736 estimated_peak_memory_range: min: 3166208 - max: 19865232 + max: 20583048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j1gle30mp + job_id: jopr9zn9p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:26:37Z' + timestamp: '2024-06-22T22:29:14Z' + - torchscript_onnx_tflite: + inference_time: 42218.0 + throughput: 23.686579184234212 + estimated_peak_memory_range: + min: 22097920 + max: 24890008 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 86 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 86 + job_id: jqp48z9vg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 42279.0 + throughput: 23.65240426689373 + estimated_peak_memory_range: + min: 3227648 + max: 20518616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 127 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 127 + job_id: jep2j2v45 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:29:15Z' - torchscript_onnx_qnn: - inference_time: 70340.0 - throughput: 14.216661927779358 + inference_time: 39514.0 + throughput: 25.307485954345296 estimated_peak_memory_range: min: 3153920 max: 3153920 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jn5q932op + job_id: jegnxj7r5 job_status: Passed - torchscript_onnx_ort: - inference_time: 42281.0 - throughput: 23.651285447364064 + torchscript_onnx: + inference_time: 42191.0 + throughput: 23.701737337346827 estimated_peak_memory_range: - min: 9379840 - max: 9379840 + min: 28254208 + max: 28254208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jwgoe31kp + job_id: j1p88l4xp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:26:40Z' + timestamp: '2024-06-22T22:29:18Z' diff --git a/qai_hub_models/models/fcn_resnet50_quantized/export.py b/qai_hub_models/models/fcn_resnet50_quantized/export.py index 646bdaab..e85ca498 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/export.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -127,7 +127,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -177,7 +177,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -217,7 +217,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/fcn_resnet50_quantized/model.py b/qai_hub_models/models/fcn_resnet50_quantized/model.py index affc65ef..faaf2c1b 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/model.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/model.py @@ -74,7 +74,6 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() final_model = cls(sim) return final_model diff --git a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml index 83867788..8727d190 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: FCN-ResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 14137.0 - throughput: 70.73636556553724 + inference_time: 14077.0 + throughput: 71.03786318107551 estimated_peak_memory_range: - min: 7475200 - max: 59586696 + min: 3821568 + max: 55833368 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: j7gjke0e5 + job_id: jn5qwjm45 job_status: Passed torchscript_onnx_qnn: - inference_time: 15266.0 - throughput: 65.5050438883794 + inference_time: 15172.0 + throughput: 65.9108884787767 estimated_peak_memory_range: - min: 839680 - max: 9922576 + min: 16384 + max: 23692160 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jnp1qed7g - job_status: Passed - torchscript_onnx_ort: - inference_time: 12789.0 - throughput: 78.19219641879741 - estimated_peak_memory_range: - min: 9297920 - max: 58295544 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 80 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 80 - job_id: j0pxey7l5 + job_id: j7gj1jwxg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:28:08Z' + timestamp: '2024-06-22T22:30:41Z' - torchscript_onnx_tflite: - inference_time: 10012.0 - throughput: 99.88014382740711 + inference_time: 10017.0 + throughput: 99.83028850953379 estimated_peak_memory_range: - min: 73728 - max: 83075216 + min: 40960 + max: 86289520 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: jlpe4krv5 + job_id: j1gl7j185 job_status: Passed torchscript_onnx_qnn: - inference_time: 11234.0 - throughput: 89.01548869503294 + inference_time: 11196.0 + throughput: 89.31761343336906 estimated_peak_memory_range: - min: 802816 - max: 55488784 + min: 46923776 + max: 100046512 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jvgd7orzg - job_status: Passed - torchscript_onnx_ort: - inference_time: 9614.0 - throughput: 104.01497815685458 - estimated_peak_memory_range: - min: 11309056 - max: 56165696 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 80 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 80 - job_id: jegnr39q5 + job_id: jlpe2jl1p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:28:09Z' + timestamp: '2024-06-22T22:30:43Z' - torchscript_onnx_tflite: - inference_time: 14165.0 - throughput: 70.5965407695023 + inference_time: 13974.0 + throughput: 71.56147130385001 estimated_peak_memory_range: - min: 5574656 - max: 14323152 + min: 5554176 + max: 7391176 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: jygzvrxxp + job_id: jw56vkd0p job_status: Passed torchscript_onnx_qnn: - inference_time: 15225.0 - throughput: 65.68144499178982 + inference_time: 15246.0 + throughput: 65.59097468188378 estimated_peak_memory_range: - min: 811008 - max: 30220216 + min: 24576 + max: 14758840 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jqp4jvx1p + job_id: jz5wxj46p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:28:07Z' + timestamp: '2024-06-22T22:30:45Z' - torchscript_onnx_tflite: - inference_time: 89203.0 - throughput: 11.210385300942793 + inference_time: 14037.0 + throughput: 71.24029351000927 estimated_peak_memory_range: - min: 6000640 - max: 92646944 + min: 5521408 + max: 300306456 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: jz5wmqdmg + job_id: j1p38ywl5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15206.0 + throughput: 65.76351440220965 + estimated_peak_memory_range: + min: 16384 + max: 227015544 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 79 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 79 + job_id: jmg986dlp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:30:47Z' + - torchscript_onnx_tflite: + inference_time: 88951.0 + throughput: 11.242144551494643 + estimated_peak_memory_range: + min: 5931008 + max: 95025424 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 87 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 87 + job_id: jwgomj4x5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T22:28:02Z' + timestamp: '2024-06-22T22:30:39Z' - torchscript_onnx_tflite: - inference_time: 728106.0 - throughput: 1.373426396705974 + inference_time: 733152.0 + throughput: 1.3639736371175417 estimated_peak_memory_range: - min: 33034240 - max: 70768096 + min: 22896640 + max: 174088744 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 12 total_layers: 87 - job_id: jmg99w38g + job_id: j1pv4j9jp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,10 +242,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T22:28:03Z' + timestamp: '2024-06-22T22:30:40Z' - torchscript_onnx_qnn: - inference_time: 16789.0 - throughput: 59.562808982071594 + inference_time: 12928.0 + throughput: 77.35148514851485 estimated_peak_memory_range: min: 794624 max: 794624 @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jz57vxj95 - job_status: Passed - torchscript_onnx_ort: - inference_time: 12535.0 - throughput: 79.77662544874352 - estimated_peak_memory_range: - min: 835584 - max: 835584 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 80 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 80 - job_id: jopr1e47g + job_id: jygzw14kg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:28:10Z' + timestamp: '2024-06-22T22:30:44Z' diff --git a/qai_hub_models/models/ffnet_122ns_lowres/export.py b/qai_hub_models/models/ffnet_122ns_lowres/export.py index 98404700..0aa30f17 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/export.py +++ b/qai_hub_models/models/ffnet_122ns_lowres/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml index 996c082b..d4318a47 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: FFNet-122NS-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 9538.0 - throughput: 104.84378276368211 + inference_time: 6446.0 + throughput: 155.13496742165685 estimated_peak_memory_range: - min: 0 - max: 1882960 + min: 675840 + max: 3400696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j1p8wz3op + job_id: jegnxjkr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 10684.0 - throughput: 93.59790340696368 + inference_time: 6982.0 + throughput: 143.22543683758235 estimated_peak_memory_range: - min: 7036928 - max: 23266984 + min: 6311936 + max: 32757704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: j1gle3emp + job_id: j2p0kn665 job_status: Passed - torchscript_onnx_ort: - inference_time: 7933.0 - throughput: 126.05571662674902 + torchscript_onnx: + inference_time: 8035.0 + throughput: 124.45550715619166 estimated_peak_memory_range: - min: 1155072 - max: 141586240 + min: 2633728 + max: 141075736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: j1pvzvzrg + job_id: jw56vkw0p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:28:42Z' + timestamp: '2024-06-22T22:31:28Z' - torchscript_onnx_tflite: - inference_time: 6833.0 - throughput: 146.34860237084735 + inference_time: 4513.0 + throughput: 221.58209616662973 estimated_peak_memory_range: - min: 659456 - max: 61929920 + min: 663552 + max: 68570384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jogkr3ln5 + job_id: jopr9zw9p job_status: Passed torchscript_onnx_qnn: - inference_time: 7606.0 - throughput: 131.47515119642387 + inference_time: 4927.0 + throughput: 202.96326364927947 estimated_peak_memory_range: min: 6307840 - max: 93102864 + max: 73938192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jw56qnqyg + job_id: j1p88l1xp job_status: Passed - torchscript_onnx_ort: - inference_time: 5594.0 - throughput: 178.7629603146228 + torchscript_onnx: + inference_time: 5615.0 + throughput: 178.09439002671417 estimated_peak_memory_range: - min: 6307840 - max: 59711872 + min: 9445376 + max: 50671440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: j7gjkeke5 + job_id: j1p38y6l5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:28:43Z' + timestamp: '2024-06-22T22:31:29Z' - torchscript_onnx_tflite: - inference_time: 9545.0 - throughput: 104.76689366160294 + inference_time: 6458.0 + throughput: 154.8467017652524 estimated_peak_memory_range: - min: 0 - max: 2096664 + min: 561152 + max: 3140840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jn5q937op + job_id: jep2j2e45 job_status: Passed torchscript_onnx_qnn: - inference_time: 10716.0 - throughput: 93.3184023889511 + inference_time: 6974.0 + throughput: 143.38973329509608 estimated_peak_memory_range: min: 6311936 - max: 40648480 + max: 30973152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jwgoe3ekp + job_id: jn5qwjv45 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:28:41Z' + timestamp: '2024-06-22T22:31:25Z' + - torchscript_onnx_tflite: + inference_time: 6444.0 + throughput: 155.18311607697083 + estimated_peak_memory_range: + min: 57344 + max: 2216792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 216 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 216 + job_id: jqpyn9m7g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7120.0 + throughput: 140.4494382022472 + estimated_peak_memory_range: + min: 6307840 + max: 30929088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 348 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 348 + job_id: j1gl7jl85 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:31:26Z' - torchscript_onnx_qnn: - inference_time: 17375.0 - throughput: 57.55395683453237 + inference_time: 6306.0 + throughput: 158.5791309863622 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: j1p3qeqn5 + job_id: jogkdj82p job_status: Passed - torchscript_onnx_ort: - inference_time: 7523.0 - throughput: 132.92569453675395 + torchscript_onnx: + inference_time: 7546.0 + throughput: 132.520540683806 estimated_peak_memory_range: - min: 6332416 - max: 6332416 + min: 6307840 + max: 6307840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jlpe4k4v5 + job_id: jwgomj8x5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:28:44Z' + timestamp: '2024-06-22T22:31:30Z' diff --git a/qai_hub_models/models/ffnet_40s/export.py b/qai_hub_models/models/ffnet_40s/export.py index 879b2dbd..0d6746bd 100644 --- a/qai_hub_models/models/ffnet_40s/export.py +++ b/qai_hub_models/models/ffnet_40s/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml index 2da118a9..b6184d8d 100644 --- a/qai_hub_models/models/ffnet_40s/perf.yaml +++ b/qai_hub_models/models/ffnet_40s/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: FFNet-40S performance_metrics: - torchscript_onnx_tflite: - inference_time: 23193.0 - throughput: 43.11645755184754 + inference_time: 16960.0 + throughput: 58.9622641509434 estimated_peak_memory_range: - min: 2531328 - max: 4441664 + min: 2121728 + max: 4539152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jz5wmqmmg + job_id: j7gj1jqxg job_status: Passed torchscript_onnx_qnn: - inference_time: 17411.0 - throughput: 57.43495491356039 + inference_time: 17413.0 + throughput: 57.42835812324125 estimated_peak_memory_range: - min: 25214976 - max: 45407080 + min: 25198592 + max: 45873296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jvgd7o7zg + job_id: jmg986mlp job_status: Passed - torchscript_onnx_ort: - inference_time: 27393.0 - throughput: 36.50567663271639 + torchscript_onnx: + inference_time: 27211.0 + throughput: 36.74984381316379 estimated_peak_memory_range: - min: 34656256 - max: 113886552 + min: 30203904 + max: 110856920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jo5mv3v95 + job_id: j0pxmw41g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:29:14Z' + timestamp: '2024-06-22T22:32:03Z' - torchscript_onnx_tflite: - inference_time: 16820.0 - throughput: 59.45303210463734 + inference_time: 12646.0 + throughput: 79.07638779060572 estimated_peak_memory_range: - min: 757760 - max: 102036720 + min: 1835008 + max: 94309104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jmg99w98g + job_id: jlpe2jy1p job_status: Passed torchscript_onnx_qnn: - inference_time: 12560.0 - throughput: 79.61783439490446 + inference_time: 12519.0 + throughput: 79.87858455148175 estimated_peak_memory_range: - min: 132333568 - max: 190814608 + min: 25178112 + max: 79443216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jz57vxv95 + job_id: jnp13rj25 job_status: Passed - torchscript_onnx_ort: - inference_time: 19832.0 - throughput: 50.42355788624445 + torchscript_onnx: + inference_time: 19821.0 + throughput: 50.45154129458655 estimated_peak_memory_range: - min: 29405184 - max: 74127520 + min: 29417472 + max: 71721472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jegnr3rq5 + job_id: jo5m4jmw5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:29:15Z' + timestamp: '2024-06-22T22:32:04Z' - torchscript_onnx_tflite: - inference_time: 23566.0 - throughput: 42.43401510650938 + inference_time: 17132.0 + throughput: 58.37030119075414 estimated_peak_memory_range: - min: 2564096 - max: 4836528 + min: 2531328 + max: 4933896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jnp1qeq7g + job_id: jygzw1nkg job_status: Passed torchscript_onnx_qnn: - inference_time: 17310.0 - throughput: 57.77007510109763 + inference_time: 17490.0 + throughput: 57.17552887364208 estimated_peak_memory_range: - min: 25202688 - max: 45281048 + min: 25210880 + max: 45868592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j0pxeyel5 + job_id: jz576q4lg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:29:13Z' + timestamp: '2024-06-22T22:32:01Z' + - torchscript_onnx_tflite: + inference_time: 17571.0 + throughput: 56.911957202208185 + estimated_peak_memory_range: + min: 2555904 + max: 5263536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 92 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 92 + job_id: jz5wxj76p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 17330.0 + throughput: 57.70340450086555 + estimated_peak_memory_range: + min: 24944640 + max: 45612104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 140 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 140 + job_id: jqp48z1vg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:32:02Z' - torchscript_onnx_qnn: - inference_time: 23356.0 - throughput: 42.81555060798082 + inference_time: 17741.0 + throughput: 56.3666084211713 estimated_peak_memory_range: - min: 25219072 - max: 25219072 + min: 25223168 + max: 25223168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,11 +232,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jqp4jvj1p + job_id: jvgd0j3ep job_status: Passed - torchscript_onnx_ort: - inference_time: 26356.0 - throughput: 37.942024586431934 + torchscript_onnx: + inference_time: 26353.0 + throughput: 37.94634386976815 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jopr1e17g + job_id: jegnxjnr5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:29:16Z' + timestamp: '2024-06-22T22:32:05Z' diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py index ad846a21..4cd3dfb5 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/export.py +++ b/qai_hub_models/models/ffnet_40s_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +216,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -229,7 +229,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml index c8a1dcdd..80fc0489 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: FFNet-40S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 6442.0 - throughput: 155.2312946289972 + inference_time: 6472.0 + throughput: 154.51174289245984 estimated_peak_memory_range: - min: 36864 - max: 1593576 + min: 667648 + max: 2150792 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,22 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jqpyv6vlp - job_status: Passed - torchscript_onnx_ort: - inference_time: 9268.0 - throughput: 107.89814415192059 - estimated_peak_memory_range: - min: 7577600 - max: 25025832 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 92 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 92 - job_id: j1pvzv4rg + job_id: jep2j2w45 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:30:02Z' + timestamp: '2024-06-22T22:32:42Z' - torchscript_onnx_tflite: - inference_time: 4682.0 - throughput: 213.58393848782572 + inference_time: 4697.0 + throughput: 212.90185224611454 estimated_peak_memory_range: - min: 12288 - max: 67067712 + min: 40960 + max: 70704480 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,22 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: j2p0elen5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 7185.0 - throughput: 139.17884481558804 - estimated_peak_memory_range: - min: 6955008 - max: 47776688 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 92 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 92 - job_id: j7gjke1e5 + job_id: jqpyn9x7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:30:03Z' + timestamp: '2024-06-22T22:32:43Z' - torchscript_onnx_tflite: - inference_time: 6401.0 - throughput: 156.22558975160132 + inference_time: 6430.0 + throughput: 155.52099533437013 estimated_peak_memory_range: - min: 651264 - max: 2179136 + min: 638976 + max: 8974520 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: j1p8wzwop + job_id: j2p0knj65 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -139,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:29:54Z' + timestamp: '2024-06-22T22:32:44Z' - torchscript_onnx_tflite: - inference_time: 35462.0 - throughput: 28.199199142744344 + inference_time: 6440.0 + throughput: 155.27950310559007 estimated_peak_memory_range: - min: 163840 - max: 38805968 + min: 647168 + max: 9801960 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,22 +127,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jogkr3rn5 + job_id: j1p88lxxp job_status: Passed reference_device_info: - name: RB3 Gen 2 (Proxy) - os: '12' - form_factor: Iot + name: SA8775 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: Qcs6490 - timestamp: '2024-06-08T22:29:55Z' + chipset: Sa8775p + timestamp: '2024-06-22T22:32:45Z' - torchscript_onnx_tflite: - inference_time: 189203.0 - throughput: 5.285328456736944 + inference_time: 35271.0 + throughput: 28.351903830342206 estimated_peak_memory_range: - min: 835584 - max: 9440536 + min: 176128 + max: 42089840 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,36 +150,36 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jn5q939op + job_id: jogkdj42p job_status: Passed reference_device_info: - name: RB5 (Proxy) + name: RB3 Gen 2 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Qcs8250 - timestamp: '2024-06-08T22:29:56Z' - - torchscript_onnx_ort: - inference_time: 8436.0 - throughput: 118.53959222380276 + chipset: Qcs6490 + timestamp: '2024-06-22T22:32:46Z' + - torchscript_onnx_tflite: + inference_time: 189478.0 + throughput: 5.277657564466587 estimated_peak_memory_range: - min: 23719936 - max: 23719936 + min: 806912 + max: 2886248 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 92 + layers_on_npu: 97 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 92 - job_id: jlpe4k2v5 + total_layers: 97 + job_id: jn5qwjy45 job_status: Passed reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:30:04Z' + chipset: Qcs8250 + timestamp: '2024-06-22T22:32:48Z' diff --git a/qai_hub_models/models/ffnet_54s/export.py b/qai_hub_models/models/ffnet_54s/export.py index a573a9f5..e27bc0d5 100644 --- a/qai_hub_models/models/ffnet_54s/export.py +++ b/qai_hub_models/models/ffnet_54s/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml index e912f5b5..7489b233 100644 --- a/qai_hub_models/models/ffnet_54s/perf.yaml +++ b/qai_hub_models/models/ffnet_54s/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: FFNet-54S performance_metrics: - torchscript_onnx_tflite: - inference_time: 25403.0 - throughput: 39.365429280006296 + inference_time: 20186.0 + throughput: 49.53928465272961 estimated_peak_memory_range: - min: 4255744 - max: 6909008 + min: 2146304 + max: 4656256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jz5wmqxmg + job_id: jvgd0jdep job_status: Passed torchscript_onnx_qnn: - inference_time: 20253.0 - throughput: 49.37540117513455 + inference_time: 20279.0 + throughput: 49.31209625721189 estimated_peak_memory_range: - min: 25219072 - max: 49749016 + min: 24895488 + max: 44659984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jvgd7o0zg + job_id: jo5m4j0w5 job_status: Passed - torchscript_onnx_ort: - inference_time: 30396.0 - throughput: 32.89906566653507 + torchscript_onnx: + inference_time: 31290.0 + throughput: 31.959092361776925 estimated_peak_memory_range: - min: 25182208 - max: 90860800 + min: 30216192 + max: 97049136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jvgd7o06g + job_id: j2p0knm65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:30:34Z' + timestamp: '2024-06-22T22:33:34Z' - torchscript_onnx_tflite: - inference_time: 18529.0 - throughput: 53.96945328943818 + inference_time: 14966.0 + throughput: 66.81812107443538 estimated_peak_memory_range: - min: 2461696 - max: 110619440 + min: 438272 + max: 104545232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jmg99w88g + job_id: jz576qelg job_status: Passed torchscript_onnx_qnn: - inference_time: 14443.0 - throughput: 69.23769300006924 + inference_time: 14534.0 + throughput: 68.8041832943443 estimated_peak_memory_range: - min: 20983808 - max: 91014848 + min: 21000192 + max: 80471360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jz5wmqx4g + job_id: jegnxjzr5 job_status: Passed - torchscript_onnx_ort: - inference_time: 23366.0 - throughput: 42.79722673970727 + torchscript_onnx: + inference_time: 22636.0 + throughput: 44.1774165046828 estimated_peak_memory_range: - min: 29618176 - max: 74645360 + min: 29200384 + max: 73676080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jz57vx6n5 + job_id: j1p88lexp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:30:35Z' + timestamp: '2024-06-22T22:33:35Z' - torchscript_onnx_tflite: - inference_time: 25775.0 - throughput: 38.797284190106694 + inference_time: 20656.0 + throughput: 48.412083656080554 estimated_peak_memory_range: - min: 2547712 - max: 5263000 + min: 229376 + max: 2364176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jnp1qe37g + job_id: jqp48zyvg job_status: Passed torchscript_onnx_qnn: - inference_time: 20126.0 - throughput: 49.686972075921695 + inference_time: 20059.0 + throughput: 49.85293384515679 estimated_peak_memory_range: - min: 25214976 - max: 40883168 + min: 25210880 + max: 40220656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jnp1qe3ng + job_id: jep2j2r45 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:30:33Z' + timestamp: '2024-06-22T22:33:31Z' + - torchscript_onnx_tflite: + inference_time: 20271.0 + throughput: 49.331557397267034 + estimated_peak_memory_range: + min: 2560000 + max: 4512832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 113 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 113 + job_id: j0pxmwl1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 19981.0 + throughput: 50.04754516790951 + estimated_peak_memory_range: + min: 25206784 + max: 44193592 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 175 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 175 + job_id: jqpyn9o7g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:33:33Z' - torchscript_onnx_qnn: - inference_time: 25735.0 - throughput: 38.857586943850784 + inference_time: 20202.0 + throughput: 49.5000495000495 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -192,11 +232,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jmg99w8mg + job_id: jopr9zl9p job_status: Passed - torchscript_onnx_ort: - inference_time: 29431.0 - throughput: 33.97777853283952 + torchscript_onnx: + inference_time: 29334.0 + throughput: 34.0901343151292 estimated_peak_memory_range: min: 25223168 max: 25223168 @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jqp4jv82p + job_id: jogkdj22p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:30:36Z' + timestamp: '2024-06-22T22:33:36Z' diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py index f16f2f1a..9dfa7408 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/export.py +++ b/qai_hub_models/models/ffnet_54s_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +216,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -229,7 +229,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml index 31883584..218f33b4 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: FFNet-54S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 7119.0 - throughput: 140.4691670178396 + inference_time: 7131.0 + throughput: 140.2327864254663 estimated_peak_memory_range: - min: 688128 - max: 2335176 + min: 163840 + max: 1788488 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,22 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jo5mv3475 - job_status: Passed - torchscript_onnx_ort: - inference_time: 9678.0 - throughput: 103.32713370531101 - estimated_peak_memory_range: - min: 7581696 - max: 40900680 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 113 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 113 - job_id: j1gle3o2p + job_id: j1gl7jy85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:31:28Z' + timestamp: '2024-06-22T22:34:18Z' - torchscript_onnx_tflite: - inference_time: 5120.0 - throughput: 195.3125 + inference_time: 5246.0 + throughput: 190.62142584826535 estimated_peak_memory_range: - min: 45056 - max: 74881936 + min: 696320 + max: 83030896 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,22 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jegnr3xj5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 7395.0 - throughput: 135.2265043948614 - estimated_peak_memory_range: - min: 5738496 - max: 42316048 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 113 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 113 - job_id: jw56qnrng + job_id: jw56vk80p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:31:29Z' + timestamp: '2024-06-22T22:34:19Z' - torchscript_onnx_tflite: - inference_time: 7096.0 - throughput: 140.92446448703495 + inference_time: 7205.0 + throughput: 138.79250520471894 estimated_peak_memory_range: - min: 61440 - max: 14772576 + min: 663552 + max: 31484880 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jopr1e9kg + job_id: j1p38yzl5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -139,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:31:20Z' + timestamp: '2024-06-22T22:34:20Z' - torchscript_onnx_tflite: - inference_time: 39816.0 - throughput: 25.11553144464537 + inference_time: 7101.0 + throughput: 140.8252358822701 estimated_peak_memory_range: - min: 122880 - max: 41244048 + min: 643072 + max: 2120000 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,22 +127,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jep23lj6g + job_id: jwgomjlx5 job_status: Passed reference_device_info: - name: RB3 Gen 2 (Proxy) - os: '12' - form_factor: Iot + name: SA8775 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: Qcs6490 - timestamp: '2024-06-08T22:31:21Z' + chipset: Sa8775p + timestamp: '2024-06-22T22:34:21Z' - torchscript_onnx_tflite: - inference_time: 203928.0 - throughput: 4.903691498960417 + inference_time: 38864.0 + throughput: 25.73075339645945 estimated_peak_memory_range: - min: 225280 - max: 7415104 + min: 126976 + max: 44890592 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,36 +150,36 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jqpyv6n0p + job_id: j1pv4jljp job_status: Passed reference_device_info: - name: RB5 (Proxy) + name: RB3 Gen 2 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Qcs8250 - timestamp: '2024-06-08T22:31:22Z' - - torchscript_onnx_ort: - inference_time: 8994.0 - throughput: 111.185234600845 + chipset: Qcs6490 + timestamp: '2024-06-22T22:34:22Z' + - torchscript_onnx_tflite: + inference_time: 198804.0 + throughput: 5.030079877668458 estimated_peak_memory_range: - min: 6340608 - max: 6340608 + min: 884736 + max: 3017472 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 113 + layers_on_npu: 118 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 113 - job_id: j1p3qexm5 + total_layers: 118 + job_id: j7gj1jrxg job_status: Passed reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:31:30Z' + chipset: Qcs8250 + timestamp: '2024-06-22T22:34:23Z' diff --git a/qai_hub_models/models/ffnet_78s/export.py b/qai_hub_models/models/ffnet_78s/export.py index f0bec0bf..fd346e9b 100644 --- a/qai_hub_models/models/ffnet_78s/export.py +++ b/qai_hub_models/models/ffnet_78s/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml index 1d0087dd..ea209937 100644 --- a/qai_hub_models/models/ffnet_78s/perf.yaml +++ b/qai_hub_models/models/ffnet_78s/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: FFNet-78S performance_metrics: - torchscript_onnx_tflite: - inference_time: 29896.0 - throughput: 33.44929087503345 + inference_time: 23277.0 + throughput: 42.960862654122096 estimated_peak_memory_range: - min: 2584576 - max: 5177832 + min: 192512 + max: 2381224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j1pvzvezg + job_id: jqp48ze8g job_status: Passed torchscript_onnx_qnn: - inference_time: 23500.0 - throughput: 42.5531914893617 + inference_time: 24928.0 + throughput: 40.11553273427471 estimated_peak_memory_range: - min: 25223168 - max: 55846352 + min: 25337856 + max: 47478816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jygzvr84p + job_id: jopr9zx0p job_status: Passed - torchscript_onnx_ort: - inference_time: 34791.0 - throughput: 28.743065735391337 + torchscript_onnx: + inference_time: 33817.0 + throughput: 29.570925865688853 estimated_peak_memory_range: - min: 31657984 - max: 174636584 + min: 897024 + max: 144124064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jvgd7o86g + job_id: jogkdj6wp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:32:03Z' + timestamp: '2024-06-22T22:35:14Z' - torchscript_onnx_tflite: - inference_time: 21247.0 - throughput: 47.065468066079916 + inference_time: 17275.0 + throughput: 57.88712011577424 estimated_peak_memory_range: - min: 684032 - max: 120904016 + min: 1945600 + max: 118752096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j7gjkeo15 + job_id: j0pxmw03g job_status: Passed torchscript_onnx_qnn: - inference_time: 17520.0 - throughput: 57.077625570776256 + inference_time: 17799.0 + throughput: 56.18293162537221 estimated_peak_memory_range: - min: 21012480 - max: 102988784 + min: 21008384 + max: 92892240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jz5wmq84g + job_id: jep2j2or5 job_status: Passed - torchscript_onnx_ort: - inference_time: 25762.0 - throughput: 38.816862044872295 + torchscript_onnx: + inference_time: 26360.0 + throughput: 37.93626707132018 estimated_peak_memory_range: - min: 31490048 - max: 82980160 + min: 29413376 + max: 78729008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jz57vxkn5 + job_id: jn5qwj4n5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:32:04Z' + timestamp: '2024-06-22T22:35:15Z' - torchscript_onnx_tflite: - inference_time: 29131.0 - throughput: 34.327692149256805 + inference_time: 24022.0 + throughput: 41.62850720173174 estimated_peak_memory_range: - min: 2592768 - max: 5433672 + min: 2560000 + max: 4718240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jlpe4k885 + job_id: jo5m4j9d5 job_status: Passed torchscript_onnx_qnn: - inference_time: 23774.0 - throughput: 42.06275763439051 + inference_time: 24147.0 + throughput: 41.41301196836046 estimated_peak_memory_range: - min: 27922432 - max: 51160616 + min: 25194496 + max: 46049992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jnp1qe7ng + job_id: j2p0kno95 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:32:02Z' + timestamp: '2024-06-22T22:35:11Z' + - torchscript_onnx_tflite: + inference_time: 23700.0 + throughput: 42.19409282700422 + estimated_peak_memory_range: + min: 126976 + max: 2246520 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 149 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 149 + job_id: jegnxj1k5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 23730.0 + throughput: 42.14075010535188 + estimated_peak_memory_range: + min: 25210880 + max: 40207040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 235 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 235 + job_id: j1p88ljkp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:35:13Z' - torchscript_onnx_qnn: - inference_time: 32569.0 - throughput: 30.70404372255826 + inference_time: 24214.0 + throughput: 41.29842240026431 estimated_peak_memory_range: - min: 25214976 - max: 25214976 + min: 25219072 + max: 25219072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,11 +232,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jmg99wkmg + job_id: jqpyn988g job_status: Passed - torchscript_onnx_ort: - inference_time: 33100.0 - throughput: 30.211480362537763 + torchscript_onnx: + inference_time: 33104.0 + throughput: 30.207829869502174 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jqp4jvm2p + job_id: j1gl7jwj5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:32:05Z' + timestamp: '2024-06-22T22:35:16Z' diff --git a/qai_hub_models/models/ffnet_78s_lowres/export.py b/qai_hub_models/models/ffnet_78s_lowres/export.py index 1ee8b996..c2b5dc43 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/export.py +++ b/qai_hub_models/models/ffnet_78s_lowres/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml index 9f48808e..598a9a41 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: FFNet-78S-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 10698.0 - throughput: 93.47541596560104 + inference_time: 7397.0 + throughput: 135.189941868325 estimated_peak_memory_range: - min: 12288 - max: 8183320 + min: 684032 + max: 2700304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jo5mv3o75 + job_id: j1p38yo35 job_status: Passed torchscript_onnx_qnn: - inference_time: 11228.0 - throughput: 89.06305664410402 + inference_time: 7650.0 + throughput: 130.718954248366 estimated_peak_memory_range: - min: 2109440 - max: 55500544 + min: 6258688 + max: 35155256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jep23l46g + job_id: jlpe2jxop job_status: Passed - torchscript_onnx_ort: - inference_time: 8904.0 - throughput: 112.30907457322552 + torchscript_onnx: + inference_time: 8935.0 + throughput: 111.9194180190263 estimated_peak_memory_range: - min: 1257472 - max: 128438216 + min: 1785856 + max: 118205632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jogkr39v5 + job_id: jvgd0j4rp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:32:36Z' + timestamp: '2024-06-22T22:35:49Z' - torchscript_onnx_tflite: - inference_time: 7663.0 - throughput: 130.49719431032233 + inference_time: 5221.0 + throughput: 191.5341888527102 estimated_peak_memory_range: - min: 159744 - max: 55453776 + min: 638976 + max: 61606000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jegnr3oj5 + job_id: jwgomjdq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7958.0 - throughput: 125.65971349585323 + inference_time: 5413.0 + throughput: 184.74043968224643 estimated_peak_memory_range: - min: 6307840 - max: 77174624 + min: 6311936 + max: 66614816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jqpyv6q0p + job_id: jygzw1yog job_status: Passed - torchscript_onnx_ort: - inference_time: 6766.0 - throughput: 147.79781259237365 + torchscript_onnx: + inference_time: 6352.0 + throughput: 157.43073047858942 estimated_peak_memory_range: min: 6307840 - max: 49412144 + max: 46772624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jn5q93mep + job_id: jz576qnvg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:32:37Z' + timestamp: '2024-06-22T22:35:51Z' - torchscript_onnx_tflite: - inference_time: 10676.0 - throughput: 93.66804046459347 + inference_time: 7384.0 + throughput: 135.42795232936078 estimated_peak_memory_range: - min: 569344 - max: 2852616 + min: 544768 + max: 2903944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jopr1eokg + job_id: j1pv4jmkp job_status: Passed torchscript_onnx_qnn: - inference_time: 11306.0 - throughput: 88.44861135680169 + inference_time: 7668.0 + throughput: 130.41210224308816 estimated_peak_memory_range: - min: 16384 - max: 52829760 + min: 6307840 + max: 26699136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j1p8wz4qp + job_id: jmg9862wp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:32:35Z' + timestamp: '2024-06-22T22:35:47Z' + - torchscript_onnx_tflite: + inference_time: 7357.0 + throughput: 135.92496941688188 + estimated_peak_memory_range: + min: 16384 + max: 1854216 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 149 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 149 + job_id: j7gj1jyvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7765.0 + throughput: 128.78300064391502 + estimated_peak_memory_range: + min: 6385664 + max: 24453616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 236 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 236 + job_id: jnp13r185 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:35:48Z' - torchscript_onnx_qnn: - inference_time: 20526.0 - throughput: 48.718698236383126 + inference_time: 7505.0 + throughput: 133.24450366422386 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j2p0elv05 + job_id: jz5wxjz3p job_status: Passed - torchscript_onnx_ort: - inference_time: 8769.0 - throughput: 114.03808872163303 + torchscript_onnx: + inference_time: 8714.0 + throughput: 114.75786091347257 estimated_peak_memory_range: - min: 30912512 - max: 30912512 + min: 46301184 + max: 46301184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: j1gle312p + job_id: jqp48z48g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:32:38Z' + timestamp: '2024-06-22T22:35:52Z' diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py index c09312bb..90c234b5 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/export.py +++ b/qai_hub_models/models/ffnet_78s_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +216,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -229,7 +229,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml index 9f134ee8..98ce0e40 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: FFNet-78S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 8325.0 - throughput: 120.12012012012012 + inference_time: 8346.0 + throughput: 119.81787682722262 estimated_peak_memory_range: - min: 663552 - max: 8732048 + min: 24576 + max: 39754352 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,22 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: j1p3qewm5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 9764.0 - throughput: 102.41704219582138 - estimated_peak_memory_range: - min: 7573504 - max: 52534152 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 149 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 149 - job_id: jvgd7oz6g + job_id: jo5m4jkd5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:33:40Z' + timestamp: '2024-06-22T22:36:43Z' - torchscript_onnx_tflite: - inference_time: 6002.0 - throughput: 166.61112962345885 + inference_time: 6005.0 + throughput: 166.5278934221482 estimated_peak_memory_range: - min: 57344 - max: 86915504 + min: 655360 + max: 97393488 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,22 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jwgoe341p - job_status: Passed - torchscript_onnx_ort: - inference_time: 7233.0 - throughput: 138.25521913452232 - estimated_peak_memory_range: - min: 8347648 - max: 53601040 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 149 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 149 - job_id: jz57vx7n5 + job_id: jegnxjqk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:33:41Z' + timestamp: '2024-06-22T22:36:45Z' - torchscript_onnx_tflite: - inference_time: 8359.0 - throughput: 119.63153487259241 + inference_time: 8457.0 + throughput: 118.24524062906468 estimated_peak_memory_range: - min: 679936 - max: 2337912 + min: 708608 + max: 2335160 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: j1pvzv9zg + job_id: jopr9zd0p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -139,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:33:33Z' + timestamp: '2024-06-22T22:36:46Z' - torchscript_onnx_tflite: - inference_time: 44458.0 - throughput: 22.49313959242431 + inference_time: 8334.0 + throughput: 119.99040076793857 estimated_peak_memory_range: - min: 729088 - max: 44729792 + min: 688128 + max: 2311856 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,22 +127,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: j7gjkew15 + job_id: jep2j2dr5 job_status: Passed reference_device_info: - name: RB3 Gen 2 (Proxy) - os: '12' - form_factor: Iot + name: SA8775 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: Qcs6490 - timestamp: '2024-06-08T22:33:33Z' + chipset: Sa8775p + timestamp: '2024-06-22T22:36:47Z' - torchscript_onnx_tflite: - inference_time: 219858.0 - throughput: 4.548390324664101 + inference_time: 44723.0 + throughput: 22.35985958008184 estimated_peak_memory_range: - min: 393216 - max: 2901200 + min: 729088 + max: 49186032 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,36 +150,36 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jlpe4kl85 + job_id: jqpyn928g job_status: Passed reference_device_info: - name: RB5 (Proxy) + name: RB3 Gen 2 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Qcs8250 - timestamp: '2024-06-08T22:33:34Z' - - torchscript_onnx_ort: - inference_time: 9426.0 - throughput: 106.08953957139826 + chipset: Qcs6490 + timestamp: '2024-06-22T22:36:48Z' + - torchscript_onnx_tflite: + inference_time: 220960.0 + throughput: 4.525706010137582 estimated_peak_memory_range: - min: 5931008 - max: 5931008 + min: 663552 + max: 10246232 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 149 + layers_on_npu: 154 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 149 - job_id: jqp4jv92p + total_layers: 154 + job_id: j2p0kn995 job_status: Passed reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:33:42Z' + chipset: Qcs8250 + timestamp: '2024-06-22T22:36:49Z' diff --git a/qai_hub_models/models/googlenet/export.py b/qai_hub_models/models/googlenet/export.py index e611b7d9..d1493151 100644 --- a/qai_hub_models/models/googlenet/export.py +++ b/qai_hub_models/models/googlenet/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml index 46726a11..00534209 100644 --- a/qai_hub_models/models/googlenet/perf.yaml +++ b/qai_hub_models/models/googlenet/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: GoogLeNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1052.0 - throughput: 950.5703422053232 + inference_time: 1046.0 + throughput: 956.0229445506692 estimated_peak_memory_range: - min: 73728 - max: 1671408 + min: 24576 + max: 1843352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j7gjkeq15 + job_id: j1gl7jqj5 job_status: Passed torchscript_onnx_qnn: inference_time: 1088.0 throughput: 919.1176470588235 estimated_peak_memory_range: min: 16384 - max: 26332424 + max: 90728480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jz5wmq44g + job_id: j1pv4jykp job_status: Passed - torchscript_onnx_ort: - inference_time: 1306.0 - throughput: 765.6967840735069 + torchscript_onnx: + inference_time: 1253.0 + throughput: 798.0845969672786 estimated_peak_memory_range: - min: 81920 - max: 33177416 + min: 12288 + max: 35524544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jz57vx9n5 + job_id: jmg9867wp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:34:47Z' + timestamp: '2024-06-22T22:38:16Z' - torchscript_onnx_tflite: - inference_time: 686.0 - throughput: 1457.725947521866 + inference_time: 681.0 + throughput: 1468.4287812041116 estimated_peak_memory_range: min: 16384 - max: 47804608 + max: 50308432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jlpe4ky85 + job_id: jw56vk06p job_status: Passed torchscript_onnx_qnn: - inference_time: 700.0 - throughput: 1428.5714285714287 + inference_time: 699.0 + throughput: 1430.615164520744 estimated_peak_memory_range: min: 0 - max: 53870528 + max: 49524992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jmg99wdmg + job_id: j7gj1j6vg job_status: Passed - torchscript_onnx_ort: - inference_time: 828.0 - throughput: 1207.729468599034 + torchscript_onnx: + inference_time: 840.0 + throughput: 1190.4761904761904 estimated_peak_memory_range: - min: 618496 - max: 31247424 + min: 0 + max: 27602224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jqp4jv32p + job_id: jnp13rk85 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:34:48Z' + timestamp: '2024-06-22T22:38:17Z' - torchscript_onnx_tflite: - inference_time: 1048.0 - throughput: 954.1984732824427 + inference_time: 1038.0 + throughput: 963.3911368015414 estimated_peak_memory_range: - min: 40960 - max: 17749600 + min: 24576 + max: 190855136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jygzvrn4p + job_id: j1p38yr35 job_status: Passed torchscript_onnx_qnn: - inference_time: 1098.0 - throughput: 910.7468123861566 + inference_time: 1088.0 + throughput: 919.1176470588235 estimated_peak_memory_range: - min: 491520 - max: 26782184 + min: 634880 + max: 5200568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jvgd7o26g + job_id: jygzw1qog job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:34:46Z' + timestamp: '2024-06-22T22:38:14Z' + - torchscript_onnx_tflite: + inference_time: 1048.0 + throughput: 954.1984732824427 + estimated_peak_memory_range: + min: 12288 + max: 2028152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 84 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 84 + job_id: jwgomj9q5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1099.0 + throughput: 909.9181073703367 + estimated_peak_memory_range: + min: 36864 + max: 37178992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 143 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 143 + job_id: jz5wxj03p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:38:15Z' - torchscript_onnx_qnn: - inference_time: 1266.0 - throughput: 789.8894154818325 + inference_time: 1231.0 + throughput: 812.3476848090983 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jnp1qe6ng + job_id: jlpe2j0op job_status: Passed - torchscript_onnx_ort: - inference_time: 1388.0 - throughput: 720.4610951008646 + torchscript_onnx: + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 671744 - max: 671744 + min: 26718208 + max: 26718208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j0pxeyx85 + job_id: jvgd0jyrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:34:49Z' + timestamp: '2024-06-22T22:38:19Z' diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py index c9504b86..ca9d6b79 100644 --- a/qai_hub_models/models/googlenet_quantized/export.py +++ b/qai_hub_models/models/googlenet_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/googlenet_quantized/model.py b/qai_hub_models/models/googlenet_quantized/model.py index e41ef8c4..ca5a57f1 100644 --- a/qai_hub_models/models/googlenet_quantized/model.py +++ b/qai_hub_models/models/googlenet_quantized/model.py @@ -82,5 +82,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml index d51b481d..577bb677 100644 --- a/qai_hub_models/models/googlenet_quantized/perf.yaml +++ b/qai_hub_models/models/googlenet_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: GoogLeNetQuantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 298.0 - throughput: 3355.7046979865772 + inference_time: 311.0 + throughput: 3215.434083601286 estimated_peak_memory_range: - min: 20480 - max: 1284320 + min: 12288 + max: 9967728 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jegnr3kj5 + job_id: jqp48z68g job_status: Passed torchscript_onnx_qnn: - inference_time: 342.0 - throughput: 2923.9766081871344 + inference_time: 352.0 + throughput: 2840.909090909091 estimated_peak_memory_range: min: 16384 - max: 10406440 + max: 79166928 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j1p8wzxqp - job_status: Passed - torchscript_onnx_ort: - inference_time: 523.0 - throughput: 1912.0458891013384 - estimated_peak_memory_range: - min: 12288 - max: 12422920 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jw56qn7ng + job_id: jqpyn9k8g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:35:33Z' + timestamp: '2024-06-22T22:38:59Z' - torchscript_onnx_tflite: - inference_time: 237.0 - throughput: 4219.4092827004215 + inference_time: 229.0 + throughput: 4366.812227074236 estimated_peak_memory_range: min: 12288 - max: 34025648 + max: 36833376 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jopr1ewkg + job_id: j0pxmw83g job_status: Passed torchscript_onnx_qnn: inference_time: 244.0 throughput: 4098.360655737705 estimated_peak_memory_range: - min: 0 - max: 42694240 + min: 163840 + max: 40895152 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jogkr34v5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 393.0 - throughput: 2544.529262086514 - estimated_peak_memory_range: - min: 12288 - max: 30491248 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: j1p3qe9m5 + job_id: j2p0kn895 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:35:34Z' + timestamp: '2024-06-22T22:39:01Z' - torchscript_onnx_tflite: inference_time: 298.0 throughput: 3355.7046979865772 estimated_peak_memory_range: min: 20480 - max: 1812976 + max: 1383264 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jep23le6g + job_id: jo5m4j1d5 job_status: Passed torchscript_onnx_qnn: - inference_time: 335.0 - throughput: 2985.0746268656717 + inference_time: 342.0 + throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 167936 - max: 10553224 + min: 172032 + max: 4165120 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j1gle3x2p + job_id: jogkdjwwp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:35:32Z' + timestamp: '2024-06-22T22:39:03Z' - torchscript_onnx_tflite: - inference_time: 964.0 - throughput: 1037.344398340249 + inference_time: 295.0 + throughput: 3389.830508474576 estimated_peak_memory_range: min: 12288 - max: 18322160 + max: 1439240 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jqpyv6m0p + job_id: jegnxjdk5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 336.0 + throughput: 2976.190476190476 + estimated_peak_memory_range: + min: 16384 + max: 127409384 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 86 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 86 + job_id: jn5qwjxn5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:39:05Z' + - torchscript_onnx_tflite: + inference_time: 959.0 + throughput: 1042.752867570386 + estimated_peak_memory_range: + min: 36864 + max: 20375936 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 84 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 84 + job_id: jopr9zm0p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T22:35:26Z' + timestamp: '2024-06-22T22:38:57Z' - torchscript_onnx_tflite: - inference_time: 5711.0 - throughput: 175.1006828926633 + inference_time: 5663.0 + throughput: 176.58484901995408 estimated_peak_memory_range: - min: 16384 - max: 2182760 + min: 57344 + max: 1977656 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j2p0elj05 + job_id: jep2j2qr5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T22:35:27Z' + timestamp: '2024-06-22T22:38:58Z' - torchscript_onnx_qnn: - inference_time: 438.0 - throughput: 2283.10502283105 + inference_time: 439.0 + throughput: 2277.904328018223 estimated_peak_memory_range: - min: 536576 - max: 536576 + min: 528384 + max: 528384 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jn5q93yep - job_status: Passed - torchscript_onnx_ort: - inference_time: 526.0 - throughput: 1901.1406844106464 - estimated_peak_memory_range: - min: 11812864 - max: 11812864 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jwgoe3r1p + job_id: j1p88ldkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:35:35Z' + timestamp: '2024-06-22T22:39:02Z' diff --git a/qai_hub_models/models/hrnet_pose/export.py b/qai_hub_models/models/hrnet_pose/export.py index 6853ea8f..80e33193 100644 --- a/qai_hub_models/models/hrnet_pose/export.py +++ b/qai_hub_models/models/hrnet_pose/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/hrnet_pose/model.py b/qai_hub_models/models/hrnet_pose/model.py index c4d3c102..184ebd11 100644 --- a/qai_hub_models/models/hrnet_pose/model.py +++ b/qai_hub_models/models/hrnet_pose/model.py @@ -71,7 +71,7 @@ def from_pretrained(cls) -> HRNetPose: cfg.freeze() net = PoseHighResolutionNet(cfg) net.load_state_dict(weights) - return cls(net).eval() + return cls(net) def forward(self, image): """ diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml index 26ccc19f..ffae2b22 100644 --- a/qai_hub_models/models/hrnet_pose/perf.yaml +++ b/qai_hub_models/models/hrnet_pose/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: HRNetPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 2822.0 - throughput: 354.3586109142452 + inference_time: 2824.0 + throughput: 354.10764872521247 estimated_peak_memory_range: - min: 28672 - max: 2472016 + min: 24576 + max: 2661720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: j7gjke715 + job_id: jlpe2jqop job_status: Passed torchscript_onnx_qnn: - inference_time: 2908.0 - throughput: 343.878954607978 + inference_time: 2922.0 + throughput: 342.23134839151265 estimated_peak_memory_range: - min: 16384 - max: 21168936 + min: 36864 + max: 17867304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jz5wmq74g + job_id: jnp13r985 job_status: Passed - torchscript_onnx_ort: - inference_time: 3074.0 - throughput: 325.30904359141186 + torchscript_onnx: + inference_time: 3097.0 + throughput: 322.8931223764934 estimated_peak_memory_range: min: 12288 - max: 131380776 + max: 131694472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jz5wmq7zg + job_id: jvgd0jkzp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:36:14Z' + timestamp: '2024-06-22T22:39:53Z' - torchscript_onnx_tflite: - inference_time: 2066.0 - throughput: 484.027105517909 + inference_time: 2048.0 + throughput: 488.28125 estimated_peak_memory_range: min: 16384 - max: 109820208 + max: 120963120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jlpe4kz85 + job_id: jygzw16og job_status: Passed torchscript_onnx_qnn: - inference_time: 2134.0 - throughput: 468.6035613870665 + inference_time: 2125.0 + throughput: 470.5882352941176 estimated_peak_memory_range: - min: 606208 - max: 190071840 + min: 651264 + max: 163638912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jmg99wmmg + job_id: jvgd0jkrp job_status: Passed - torchscript_onnx_ort: - inference_time: 2205.0 - throughput: 453.51473922902494 + torchscript_onnx: + inference_time: 2246.0 + throughput: 445.2359750667854 estimated_peak_memory_range: min: 12288 - max: 92302688 + max: 75139088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jmg99wmqg + job_id: jz576qm9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:36:15Z' + timestamp: '2024-06-22T22:39:54Z' - torchscript_onnx_tflite: - inference_time: 2832.0 - throughput: 353.1073446327684 + inference_time: 2827.0 + throughput: 353.73187124159887 estimated_peak_memory_range: - min: 28672 - max: 3094624 + min: 49152 + max: 2840272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jygzvrm4p + job_id: jz5wxjk3p job_status: Passed torchscript_onnx_qnn: - inference_time: 2903.0 - throughput: 344.47123665173956 + inference_time: 2900.0 + throughput: 344.82758620689657 estimated_peak_memory_range: - min: 12288 - max: 20792584 + min: 606208 + max: 15481792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jvgd7o36g + job_id: jmg986r8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:36:13Z' + timestamp: '2024-06-22T22:39:50Z' + - torchscript_onnx_tflite: + inference_time: 2836.0 + throughput: 352.60930888575456 + estimated_peak_memory_range: + min: 32768 + max: 2799448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jmg986rwp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2927.0 + throughput: 341.646737273659 + estimated_peak_memory_range: + min: 12288 + max: 21375320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 747 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 747 + job_id: jnp13r975 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:39:52Z' - torchscript_onnx_qnn: - inference_time: 3132.0 - throughput: 319.28480204342276 + inference_time: 2886.0 + throughput: 346.5003465003465 estimated_peak_memory_range: - min: 897024 - max: 897024 + min: 589824 + max: 589824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jnp1qejng + job_id: jz5wxjkmp job_status: Passed - torchscript_onnx_ort: - inference_time: 2963.0 - throughput: 337.4957813027337 + torchscript_onnx: + inference_time: 2975.0 + throughput: 336.1344537815126 estimated_peak_memory_range: - min: 49115136 - max: 49115136 + min: 44404736 + max: 44404736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jnp1qejkg + job_id: jqp48z71g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:36:16Z' + timestamp: '2024-06-22T22:39:55Z' diff --git a/qai_hub_models/models/hrnet_pose_quantized/README.md b/qai_hub_models/models/hrnet_pose_quantized/README.md new file mode 100644 index 00000000..11dac69c --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/README.md @@ -0,0 +1,61 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [HRNetPoseQuantized: Perform accurate human pose estimation](https://aihub.qualcomm.com/models/hrnet_pose_quantized) + +HRNet performs pose estimation in high-resolution representations. + +This is based on the implementation of HRNetPoseQuantized found +[here](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on +a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[hrnet_pose_quantized]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.hrnet_pose_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.hrnet_pose_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + +## License +- The license for the original implementation of HRNetPoseQuantized can be found + [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + +## References +* [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212) +* [Source Model Implementation](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet) + +## Community +* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/hrnet_pose_quantized/__init__.py b/qai_hub_models/models/hrnet_pose_quantized/__init__.py new file mode 100644 index 00000000..26dbe409 --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/__init__.py @@ -0,0 +1,8 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp # noqa: F401 + +from .model import MODEL_ID # noqa: F401 +from .model import HRNetPoseQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/hrnet_pose_quantized/conftest.py b/qai_hub_models/models/hrnet_pose_quantized/conftest.py new file mode 100644 index 00000000..c4b5e588 --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.hrnet_pose_quantized import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/hrnet_pose_quantized/demo.py b/qai_hub_models/models/hrnet_pose_quantized/demo.py new file mode 100644 index 00000000..a5eca7ae --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/demo.py @@ -0,0 +1,57 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp +from qai_hub_models.models.hrnet_pose_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + HRNetPoseQuantizable, +) +from qai_hub_models.utils.args import ( + demo_model_from_cli_args, + get_model_cli_parser, + get_on_device_demo_parser, + validate_on_device_demo_args, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image +from qai_hub_models.utils.display import display_or_save_image + +IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "hrnet_pose_demo.png" +) + + +# The demo will display a image with the predicted keypoints. +def main(is_test: bool = False): + # Demo parameters + parser = get_model_cli_parser(HRNetPoseQuantizable) + parser = get_on_device_demo_parser(parser, add_output_dir=True) + parser.add_argument( + "--image", + type=str, + default=IMAGE_ADDRESS, + help="image file path or URL", + ) + + args = parser.parse_args([] if is_test else None) + validate_on_device_demo_args(args, MODEL_ID) + + # Load image & model + model = demo_model_from_cli_args(HRNetPoseQuantizable, MODEL_ID, args) + image = load_image(args.image) + print("Model Loaded") + + app = HRNetPoseApp(model) + keypoints = app.predict_pose_keypoints(image)[0] + if not is_test: + display_or_save_image( + keypoints, + args.output_dir, + "hrnetpose_quantized_demo_output.png", + "keypoints", + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/hrnet_pose_quantized/export.py b/qai_hub_models/models/hrnet_pose_quantized/export.py new file mode 100644 index 00000000..f1bb104e --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/export.py @@ -0,0 +1,241 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, cast + +import qai_hub as hub + +from qai_hub_models.models.hrnet_pose_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.base_model import TargetRuntime +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, + transpose_channel_first_to_last, + transpose_channel_last_to_first, +) +from qai_hub_models.utils.qnn_helpers import get_qnn_inputs + + +def export_model( + device: str = "Samsung Galaxy S23 (Family)", + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[ + str +]: + """ + This function accomplishes 6 main tasks: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format. + 2. Compiles the model to an asset that can be run on device. + 3. Profiles the model performance on real devices. + 4. Inferences the model on sample inputs. + 5. Downloads the model asset to the local directory. + 6. Summarizes the results from profiling and inference. + + Each of the last four steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A 3-tuple of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + """ + model_name = "hrnet_pose_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if chipset: + hub_device = hub.Device(attributes=f"chipset:{chipset}") + else: + hub_device = hub.Device(name=device) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "hrnet_pose_quantized", + "HRNetPoseQuantized", + device, + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # 1. Initialize PyTorch model + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec + ) + if target_runtime == TargetRuntime.TFLITE: + quant_calibration_data = None + else: + quant_calibration_data = model.get_calibration_data(target_runtime, input_spec) + + # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) + channel_last_flags = ( + " --force_channel_last_input image" + " --force_channel_last_output output_0" + if target_runtime != TargetRuntime.ONNX + else "" + ) + + # 2. Compile the model to an on-device asset + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options + channel_last_flags, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=quant_calibration_data, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profile the model asset on real devices + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Run inference on-device with sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs(input_spec) + hub_inputs = sample_inputs + if target_runtime == TargetRuntime.QNN: + hub_inputs = get_qnn_inputs(compile_job, sample_inputs) + # Convert inputs from channel first to channel last + hub_inputs = ( + sample_inputs + if target_runtime == TargetRuntime.ONNX + else transpose_channel_first_to_last("image", sample_inputs, target_runtime) + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=hub_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Download the model asset to a local file + if not skip_downloading: + if target_runtime == TargetRuntime.QNN: + target_runtime_extension = "so" + elif target_runtime == TargetRuntime.TFLITE: + target_runtime_extension = "tflite" + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: + target_runtime_extension = "onnx" + + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download( + str(output_path / f"{model_name}.{target_runtime_extension}") + ) + + # 6. Summarize the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: Dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + torch_out = torch_inference(model, sample_inputs) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + # Convert outputs from channel last to channel first + inference_result = ( + inference_result + if target_runtime == TargetRuntime.ONNX + else transpose_channel_last_to_first( + "output_0", inference_result, target_runtime + ) + ) + print_inference_metrics(inference_job, inference_result, torch_out) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device) + + return (compile_job, profile_job, inference_job) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model, supports_onnx=False) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/hrnet_pose_quantized/info.yaml b/qai_hub_models/models/hrnet_pose_quantized/info.yaml new file mode 100644 index 00000000..9c051f2b --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/info.yaml @@ -0,0 +1,36 @@ +name: HRNetPoseQuantized +# id must match with the model dir name in qai_hub_models +id: hrnet_pose_quantized +status: public +headline: Perform accurate human pose estimation. +domain: Computer Vision +use_case: Pose Estimation +description: HRNet performs pose estimation in high-resolution representations. +tags: + - quantized +research_paper: https://arxiv.org/abs/1902.09212 +research_paper_title: Deep High-Resolution Representation Learning for Human Pose + Estimation +license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: + https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet +technical_details: + Model checkpoint: hrnet_posenet_FP32_state_dict + Input resolution: 256x192 + Number of parameters: 28.5M + Model size: 109 MB +applicable_scenarios: + - Injury prevention training + - Sports performance analysis + - Posture recognition +form_factors: + - Phone + - Tablet + - IoT +related_models: [litehrnet, hrnet_pose] +has_static_banner: yes +has_animated_banner: yes +license_type: other +deploy_license_type: AI Model Hub License +dataset: [] diff --git a/qai_hub_models/models/hrnet_pose_quantized/model.py b/qai_hub_models/models/hrnet_pose_quantized/model.py new file mode 100644 index 00000000..ff7bf2e2 --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/model.py @@ -0,0 +1,72 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, + tie_observers, + constrain_quantized_inputs_to_image_range, +) + +# isort: on + +import torch +from aimet_torch.cross_layer_equalization import equalize_model +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim + +from qai_hub_models.models.hrnet_pose.model import HRNetPose +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 3 +DEFAULT_ENCODINGS = "hrnet_pose_quantized_encodings.json" + + +class HRNetPoseQuantizable(AIMETQuantizableMixin, HRNetPose): + """HRNetPose with post training quantization support + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date.""" + + def __init__( + self, + hrnet_model: QuantizationSimModel, + ) -> None: + HRNetPose.__init__(self, hrnet_model.model) + AIMETQuantizableMixin.__init__(self, hrnet_model) + + @classmethod + def from_pretrained( + cls, aimet_encodings: str | None = "DEFAULT" + ) -> HRNetPoseQuantizable: + model = HRNetPose.from_pretrained() + input_shape = HRNetPose.get_input_spec()["image"][0] + model = prepare_model(model) + equalize_model(model, input_shape) + + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=torch.rand(input_shape), + ) + tie_observers(sim) + constrain_quantized_inputs_to_image_range(sim) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + load_encodings_to_sim(sim, aimet_encodings) + + final_model = cls(sim) + return final_model diff --git a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml new file mode 100644 index 00000000..c185397f --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml @@ -0,0 +1,265 @@ +aggregated: + supported_oses: + - Android + supported_devices: + - Google Pixel 3 + - Google Pixel 3a + - Google Pixel 3a XL + - Google Pixel 4 + - Google Pixel 4a + - Google Pixel 5a 5G + - QCS8250 (Proxy) + - QCS8550 (Proxy) + - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy Tab S8 + - Snapdragon X Elite CRD + - Xiaomi 12 + - Xiaomi 12 Pro + supported_chipsets: + - Qcs8250 + - Qcs8550 + - Sa8540p + - Sa8775p + - Snapdragon® 8 Gen 1 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 3 + - Snapdragon® 888 + - Snapdragon® X Elite +models: +- name: HRNetPoseQuantized + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 958.0 + throughput: 1043.8413361169103 + estimated_peak_memory_range: + min: 24576 + max: 1789808 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jo5m4j795 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1238.0 + throughput: 807.7544426494346 + estimated_peak_memory_range: + min: 12288 + max: 15030312 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 488 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 488 + job_id: j1p88lnop + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-06-22T22:42:46Z' + - torchscript_onnx_tflite: + inference_time: 707.0 + throughput: 1414.4271570014143 + estimated_peak_memory_range: + min: 12288 + max: 103834048 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jegnxj4q5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 923.0 + throughput: 1083.4236186348862 + estimated_peak_memory_range: + min: 16384 + max: 154376160 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 488 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 488 + job_id: jogkdj1np + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-06-22T22:42:48Z' + - torchscript_onnx_tflite: + inference_time: 951.0 + throughput: 1051.5247108307046 + estimated_peak_memory_range: + min: 24576 + max: 3344920 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jopr9zr7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1246.0 + throughput: 802.5682182985554 + estimated_peak_memory_range: + min: 16384 + max: 11938672 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 488 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 488 + job_id: j1gl7jjm5 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8550 + timestamp: '2024-06-22T22:42:50Z' + - torchscript_onnx_tflite: + inference_time: 946.0 + throughput: 1057.0824524312895 + estimated_peak_memory_range: + min: 12288 + max: 3002248 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jep2j21q5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1240.0 + throughput: 806.4516129032259 + estimated_peak_memory_range: + min: 12288 + max: 21634464 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 488 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 488 + job_id: jw56vkkyp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:42:51Z' + - torchscript_onnx_tflite: + inference_time: 3686.0 + throughput: 271.2967986977754 + estimated_peak_memory_range: + min: 24576 + max: 64839808 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jqpyn9llg + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs6490 + timestamp: '2024-06-22T22:42:44Z' + - torchscript_onnx_tflite: + inference_time: 17208.0 + throughput: 58.11250581125058 + estimated_peak_memory_range: + min: 12288 + max: 2244152 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: j2p0knwn5 + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8250 + timestamp: '2024-06-22T22:42:45Z' + - torchscript_onnx_qnn: + inference_time: 1292.0 + throughput: 773.9938080495356 + estimated_peak_memory_range: + min: 331776 + max: 331776 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 488 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 488 + job_id: jn5qwjno5 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-06-22T22:42:49Z' diff --git a/qai_hub_models/models/hrnet_pose_quantized/requirements.txt b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt new file mode 100644 index 00000000..a112c877 --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt @@ -0,0 +1,5 @@ +aimet-torch==1.31.2; sys_platform == "linux" +yacs==0.1.8 +mmpose==1.2.0 +mmcv==2.1.0 +mmdet==3.2.0 diff --git a/qai_hub_models/models/hrnet_pose_quantized/test.py b/qai_hub_models/models/hrnet_pose_quantized/test.py new file mode 100644 index 00000000..70bb789b --- /dev/null +++ b/qai_hub_models/models/hrnet_pose_quantized/test.py @@ -0,0 +1,42 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np +import torch + +from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp +from qai_hub_models.models.hrnet_pose.demo import IMAGE_ADDRESS +from qai_hub_models.models.hrnet_pose.demo import main as demo_main +from qai_hub_models.models.hrnet_pose_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + HRNetPoseQuantizable, +) +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + load_image, + load_numpy, +) +from qai_hub_models.utils.testing import skip_clone_repo_check + +OUTPUT_KEYPOINTS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "hrnet_keypoints.npy" +) + + +@skip_clone_repo_check +def test_task(): + # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. + torch.manual_seed(0) + image = load_image(IMAGE_ADDRESS) + model = HRNetPoseQuantizable.from_pretrained() + app = HRNetPoseApp(model=model) + output = app.predict(image, raw_output=True) + output_gt = load_numpy(OUTPUT_KEYPOINTS) + np.testing.assert_allclose(output, output_gt, atol=5) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py index ff097a89..b64ea30f 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py @@ -35,7 +35,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -114,7 +114,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # 2. Compile the model to an on-device asset @@ -171,7 +170,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -200,8 +199,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py index f476aa98..4d633178 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py @@ -191,13 +191,13 @@ def convert_to_wavlm_npu(model: WavLMModel): conv_layer = model.feature_extractor.conv_layers[0] assert isinstance(conv_layer, WavLMGroupNormConvLayer) # Replace with NPU friendly implementation - conv_layer_npu = WavLMGroupNormConvLayerNPU(conv_layer).eval() + conv_layer_npu = WavLMGroupNormConvLayerNPU(conv_layer) model.feature_extractor.conv_layers[0] = conv_layer_npu conv_layer1 = model.feature_extractor.conv_layers[1].conv assert isinstance(conv_layer1, torch.nn.Conv1d) # Replace with NPU friendly implementation - conv_layer1_npu = SliceConv1d(conv_layer1).eval() + conv_layer1_npu = SliceConv1d(conv_layer1) model.feature_extractor.conv_layers[1].conv = conv_layer1_npu return model diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml index cf928d5f..a2d908eb 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: HuggingFace-WavLM-Base-Plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 920916.0 - throughput: 1.085875367568812 + inference_time: 982866.0 + throughput: 1.0174326917402778 estimated_peak_memory_range: - min: 147881984 - max: 155477640 + min: 149377024 + max: 152557208 primary_compute_unit: CPU precision: fp32 layer_info: @@ -48,7 +50,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 811 total_layers: 811 - job_id: jw56qn8vg + job_id: jz5wxjjmp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +59,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:39:46Z' + timestamp: '2024-06-22T22:43:27Z' - torchscript_onnx_tflite: - inference_time: 819047.0 - throughput: 1.220931155354943 + inference_time: 829981.0 + throughput: 1.2048468579401215 estimated_peak_memory_range: - min: 148029440 - max: 185119104 + min: 149364736 + max: 187276640 primary_compute_unit: CPU precision: fp32 layer_info: @@ -71,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 811 total_layers: 811 - job_id: j1p3qezx5 + job_id: jmg98668p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:39:47Z' + timestamp: '2024-06-22T22:43:28Z' - torchscript_onnx_tflite: - inference_time: 932003.0 - throughput: 1.0729579196633487 + inference_time: 902406.0 + throughput: 1.1081486603590844 estimated_peak_memory_range: - min: 149381120 - max: 153841752 + min: 149413888 + max: 152860856 primary_compute_unit: CPU precision: fp32 layer_info: @@ -94,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 811 total_layers: 811 - job_id: jep2moqq5 + job_id: jnp13rr75 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,4 +105,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-23T16:02:38Z' + timestamp: '2024-06-22T22:43:29Z' + - torchscript_onnx_tflite: + inference_time: 915091.0 + throughput: 1.0927874932656971 + estimated_peak_memory_range: + min: 148840448 + max: 162000816 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 811 + total_layers: 811 + job_id: jvgd0jjzp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:43:30Z' diff --git a/qai_hub_models/models/inception_v3/export.py b/qai_hub_models/models/inception_v3/export.py index 868a0239..7bb1e75a 100644 --- a/qai_hub_models/models/inception_v3/export.py +++ b/qai_hub_models/models/inception_v3/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml index bf2b161e..f7a72623 100644 --- a/qai_hub_models/models/inception_v3/perf.yaml +++ b/qai_hub_models/models/inception_v3/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Inception-v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1355.0 - throughput: 738.0073800738007 + inference_time: 1358.0 + throughput: 736.3770250368188 estimated_peak_memory_range: - min: 24576 - max: 2203288 + min: 16384 + max: 1607904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jvgd7odkg + job_id: j2p0knnn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1424.0 - throughput: 702.2471910112359 + inference_time: 1420.0 + throughput: 704.2253521126761 estimated_peak_memory_range: min: 16384 - max: 150398664 + max: 150142440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j0pxeylj5 + job_id: j1gl7jnm5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1714.0 - throughput: 583.4305717619603 + torchscript_onnx: + inference_time: 1745.0 + throughput: 573.0659025787966 estimated_peak_memory_range: - min: 24576 - max: 216921632 + min: 49152 + max: 238093168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jep23loxg + job_id: j7gj1jxeg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:40:24Z' + timestamp: '2024-06-22T22:44:11Z' - torchscript_onnx_tflite: - inference_time: 1026.0 - throughput: 974.6588693957115 + inference_time: 1029.0 + throughput: 971.8172983479105 estimated_peak_memory_range: - min: 12288 - max: 54111920 + min: 16384 + max: 56966656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jz57vxeq5 + job_id: j1p88llop job_status: Passed torchscript_onnx_qnn: - inference_time: 1055.0 - throughput: 947.8672985781991 + inference_time: 1051.0 + throughput: 951.4747859181732 estimated_peak_memory_range: - min: 0 - max: 64200016 + min: 618496 + max: 55280240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jo5mv30y5 + job_id: jw56vk6yp job_status: Passed - torchscript_onnx_ort: - inference_time: 1328.0 - throughput: 753.0120481927711 + torchscript_onnx: + inference_time: 1309.0 + throughput: 763.9419404125287 estimated_peak_memory_range: - min: 0 - max: 33764336 + min: 618496 + max: 28195584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jqpyv68rp + job_id: jlpe2j9vp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:40:25Z' + timestamp: '2024-06-22T22:44:13Z' - torchscript_onnx_tflite: - inference_time: 1355.0 - throughput: 738.0073800738007 + inference_time: 1356.0 + throughput: 737.4631268436578 estimated_peak_memory_range: - min: 16384 - max: 2130328 + min: 28672 + max: 2258048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jqp4jvyqp + job_id: jogkdjjnp job_status: Passed torchscript_onnx_qnn: - inference_time: 1411.0 - throughput: 708.7172218284904 + inference_time: 1391.0 + throughput: 718.9072609633357 estimated_peak_memory_range: - min: 0 - max: 150030456 + min: 626688 + max: 6108008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jopr1exvg + job_id: jwgomjyk5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:40:23Z' + timestamp: '2024-06-22T22:44:09Z' + - torchscript_onnx_tflite: + inference_time: 1376.0 + throughput: 726.7441860465116 + estimated_peak_memory_range: + min: 61440 + max: 1719272 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 129 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 129 + job_id: jn5qwjjo5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1409.0 + throughput: 709.7232079488999 + estimated_peak_memory_range: + min: 20480 + max: 149840536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 219 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 219 + job_id: j1pv4j3rp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:44:10Z' - torchscript_onnx_qnn: - inference_time: 1503.0 - throughput: 665.335994677312 + inference_time: 1441.0 + throughput: 693.9625260235947 estimated_peak_memory_range: - min: 1097728 - max: 1097728 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jegnr31v5 + job_id: j1p38ykn5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1640.0 - throughput: 609.7560975609756 + torchscript_onnx: + inference_time: 1655.0 + throughput: 604.2296072507553 estimated_peak_memory_range: - min: 39940096 - max: 39940096 + min: 29585408 + max: 29585408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j2p0elo25 + job_id: jygzw1exg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:40:26Z' + timestamp: '2024-06-22T22:44:14Z' diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py index e6cd4f44..d4111ea5 100644 --- a/qai_hub_models/models/inception_v3_quantized/export.py +++ b/qai_hub_models/models/inception_v3_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/inception_v3_quantized/model.py b/qai_hub_models/models/inception_v3_quantized/model.py index 2a74e221..c5eaac55 100644 --- a/qai_hub_models/models/inception_v3_quantized/model.py +++ b/qai_hub_models/models/inception_v3_quantized/model.py @@ -85,5 +85,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml index a770e878..bca3cb76 100644 --- a/qai_hub_models/models/inception_v3_quantized/perf.yaml +++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: Inception-v3-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 615.0 - throughput: 1626.0162601626016 + inference_time: 619.0 + throughput: 1615.5088852988692 estimated_peak_memory_range: - min: 12288 - max: 1478976 + min: 32768 + max: 1575264 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,29 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jogkr36y5 + job_id: jmg986v8p job_status: Passed torchscript_onnx_qnn: - inference_time: 646.0 - throughput: 1547.9876160990711 - estimated_peak_memory_range: - min: 12288 - max: 165286688 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jwgoe3d4p - job_status: Passed - torchscript_onnx_ort: - inference_time: 844.0 - throughput: 1184.8341232227488 + inference_time: 652.0 + throughput: 1533.7423312883436 estimated_peak_memory_range: - min: 12288 - max: 65222768 + min: 16384 + max: 251878408 primary_compute_unit: NPU precision: int8 layer_info: @@ -84,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jygzvrzzp + job_id: jo5m4jr95 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:41:32Z' + timestamp: '2024-06-22T22:45:18Z' - torchscript_onnx_tflite: - inference_time: 486.0 - throughput: 2057.61316872428 + inference_time: 503.0 + throughput: 1988.0715705765408 estimated_peak_memory_range: min: 12288 - max: 67571472 + max: 70334384 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jn5q9347p + job_id: jnp13r075 job_status: Passed torchscript_onnx_qnn: - inference_time: 496.0 - throughput: 2016.1290322580646 + inference_time: 493.0 + throughput: 2028.3975659229209 estimated_peak_memory_range: - min: 167936 - max: 54564464 + min: 163840 + max: 47775424 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,14 +111,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: j1pvzv27g + job_id: jegnxj2q5 job_status: Passed - torchscript_onnx_ort: - inference_time: 659.0 - throughput: 1517.4506828528072 + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-06-22T22:45:19Z' + - torchscript_onnx_tflite: + inference_time: 631.0 + throughput: 1584.7860538827258 estimated_peak_memory_range: min: 12288 - max: 43078608 + max: 1574296 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: jvgd0jwzp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 640.0 + throughput: 1562.5 + estimated_peak_memory_range: + min: 16384 + max: 45098152 primary_compute_unit: NPU precision: int8 layer_info: @@ -137,22 +149,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jz5wmqyzg + job_id: jep2j28q5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:41:33Z' + manufacturer: Qualcomm + chipset: Qcs8550 + timestamp: '2024-06-22T22:45:22Z' - torchscript_onnx_tflite: inference_time: 625.0 throughput: 1600.0 estimated_peak_memory_range: - min: 24576 - max: 1548872 + min: 12288 + max: 1706288 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j1gle3wep + job_id: jz576qz9g job_status: Passed torchscript_onnx_qnn: - inference_time: 648.0 - throughput: 1543.20987654321 + inference_time: 639.0 + throughput: 1564.9452269170579 estimated_peak_memory_range: - min: 36864 - max: 39620504 + min: 16384 + max: 28691688 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,22 +187,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jlpe4k675 + job_id: jqpyn9elg job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8775 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: Qcs8550 - timestamp: '2024-06-08T22:41:31Z' + chipset: Sa8775p + timestamp: '2024-06-22T22:45:23Z' - torchscript_onnx_tflite: - inference_time: 2357.0 - throughput: 424.26813746287655 + inference_time: 2365.0 + throughput: 422.8329809725159 estimated_peak_memory_range: min: 12288 - max: 22249744 + max: 25181632 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jw56qnovg + job_id: jqp48zq1g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T22:41:26Z' + timestamp: '2024-06-22T22:45:15Z' - torchscript_onnx_tflite: - inference_time: 7805.0 - throughput: 128.12299807815504 + inference_time: 7614.0 + throughput: 131.33701076963487 estimated_peak_memory_range: - min: 16384 - max: 2215816 + min: 36864 + max: 2710192 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j1p3qeox5 + job_id: j0pxmwvlg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,10 +242,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T22:41:27Z' + timestamp: '2024-06-22T22:45:17Z' - torchscript_onnx_qnn: - inference_time: 706.0 - throughput: 1416.4305949008499 + inference_time: 714.0 + throughput: 1400.5602240896358 estimated_peak_memory_range: min: 450560 max: 450560 @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: j7gjke375 - job_status: Passed - torchscript_onnx_ort: - inference_time: 782.0 - throughput: 1278.772378516624 - estimated_peak_memory_range: - min: 12218368 - max: 12218368 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jmg99woqg + job_id: jopr9zk7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:41:34Z' + timestamp: '2024-06-22T22:45:21Z' diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py index b4b49854..43b20241 100644 --- a/qai_hub_models/models/lama_dilated/export.py +++ b/qai_hub_models/models/lama_dilated/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,14 +117,13 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image,mask" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image,mask", sample_inputs, target_runtime ) @@ -190,7 +189,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -212,7 +211,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -227,7 +226,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_ort=False) + parser = export_parser(model_cls=Model, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml index 87e48fb1..4682c186 100644 --- a/qai_hub_models/models/lama_dilated/perf.yaml +++ b/qai_hub_models/models/lama_dilated/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: LaMa-Dilated performance_metrics: - torchscript_onnx_tflite: - inference_time: 86343.0 - throughput: 11.581714788691613 + inference_time: 76792.0 + throughput: 13.022189811438691 estimated_peak_memory_range: min: 3289088 - max: 139370192 + max: 54930752 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 344 + layers_on_npu: 343 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 344 - job_id: jvgd7o6kg + total_layers: 343 + job_id: j1p38y3n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 81307.0 - throughput: 12.299064041226462 + inference_time: 70643.0 + throughput: 14.155684214996532 estimated_peak_memory_range: - min: 3371008 - max: 42726616 + min: 3166208 + max: 43996992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j0pxey0j5 + job_id: jlpe2j1vp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +74,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:42:12Z' + timestamp: '2024-06-22T22:46:12Z' - torchscript_onnx_tflite: - inference_time: 59391.0 - throughput: 16.837567981680728 + inference_time: 51609.0 + throughput: 19.37646534519173 estimated_peak_memory_range: - min: 53248 - max: 241657616 + min: 2371584 + max: 239485152 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 344 + layers_on_npu: 343 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 344 - job_id: jz57vxoq5 + total_layers: 343 + job_id: jwgomj0k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 57168.0 - throughput: 17.492303386509935 + inference_time: 48645.0 + throughput: 20.557097337855893 estimated_peak_memory_range: - min: 2736128 - max: 165991776 + min: 4243456 + max: 131020112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jo5mv39y5 + job_id: jygzw19xg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,28 +112,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:42:13Z' + timestamp: '2024-06-22T22:46:14Z' - torchscript_onnx_tflite: - inference_time: 85709.0 - throughput: 11.667386155479589 + inference_time: 75718.0 + throughput: 13.20689928418606 estimated_peak_memory_range: - min: 3477504 - max: 138753616 + min: 3268608 + max: 139238816 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 344 + layers_on_npu: 343 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 344 - job_id: jqp4jveqp + total_layers: 343 + job_id: j1pv4jorp job_status: Passed torchscript_onnx_qnn: - inference_time: 81015.0 - throughput: 12.343393198790347 + inference_time: 70761.0 + throughput: 14.13207840477099 estimated_peak_memory_range: - min: 3174400 - max: 43648896 + min: 3223552 + max: 40930456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +141,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jopr1edvg + job_id: jmg98618p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +150,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:42:16Z' + timestamp: '2024-06-22T22:46:16Z' + - torchscript_onnx_tflite: + inference_time: 75987.0 + throughput: 13.160145814415623 + estimated_peak_memory_range: + min: 2224128 + max: 53851312 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 343 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 343 + job_id: j7gj1jmeg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 71029.0 + throughput: 14.078756564220248 + estimated_peak_memory_range: + min: 3190784 + max: 42527648 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 333 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 333 + job_id: jnp13rl75 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:46:17Z' - torchscript_onnx_qnn: - inference_time: 91919.0 - throughput: 10.879143593816295 + inference_time: 70571.0 + throughput: 14.170126539229996 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -162,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jegnr3qv5 + job_id: jz5wxjvmp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -171,4 +211,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:42:14Z' + timestamp: '2024-06-22T22:46:15Z' diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py index 9dba5402..a508e28e 100644 --- a/qai_hub_models/models/litehrnet/export.py +++ b/qai_hub_models/models/litehrnet/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -115,7 +115,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # 2. Compile the model to an on-device asset @@ -172,7 +171,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -204,8 +203,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml index 64bcf8e7..8342a7ef 100644 --- a/qai_hub_models/models/litehrnet/perf.yaml +++ b/qai_hub_models/models/litehrnet/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: LiteHRNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 11261.0 - throughput: 88.80206020779683 + inference_time: 11191.0 + throughput: 89.35751943526047 estimated_peak_memory_range: - min: 6529024 - max: 13390128 + min: 6553600 + max: 29020720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,7 +50,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: jogkr30y5 + job_id: jo5m4j295 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +59,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:42:54Z' + timestamp: '2024-06-22T22:46:56Z' - torchscript_onnx_tflite: - inference_time: 7629.0 - throughput: 131.07877834578582 + inference_time: 7507.0 + throughput: 133.20900492873318 estimated_peak_memory_range: - min: 6545408 - max: 86932832 + min: 20480 + max: 80905408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -71,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: jn5q9317p + job_id: jegnxjyq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:42:55Z' + timestamp: '2024-06-22T22:46:58Z' - torchscript_onnx_tflite: - inference_time: 11181.0 - throughput: 89.43743851176103 + inference_time: 11155.0 + throughput: 89.64589870013447 estimated_peak_memory_range: - min: 6561792 - max: 18010528 + min: 6529024 + max: 11698232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -94,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: j1gle38ep + job_id: jopr9zq7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,4 +105,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:42:56Z' + timestamp: '2024-06-22T22:46:59Z' + - torchscript_onnx_tflite: + inference_time: 11199.0 + throughput: 89.2936869363336 + estimated_peak_memory_range: + min: 6529024 + max: 198676280 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1226 + layers_on_gpu: 0 + layers_on_cpu: 10 + total_layers: 1236 + job_id: jep2j26q5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:47:00Z' diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md index 54e3e3f8..d7443fc1 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md @@ -3,7 +3,7 @@ # [Llama-v2-7B-Chat: State-of-the-art large language model useful on a variety of language understanding and generation tasks](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized) -Llama 2 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to 4-bit weights and 16-bit activations making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-KVCache-Quantized's latency. +Llama 2 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to w4a16(4-bit weights and 16-bit activations) and part of the model is quantized to w8a16(8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-KVCache-Quantized's latency. This is based on the implementation of Llama-v2-7B-Chat found [here](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). This repository contains scripts for optimized on-device @@ -34,7 +34,39 @@ Here, we divide the model into 4 parts in order to In order to export Llama 2, please ensure 1. Host machine has >40GB memory (RAM+swap-space) -2. If you don't have enough memory, export.py will dump instructions to increase swap space accordingly +2. If you don't have enough memory, export.py will dump instructions to increase swap space accordingly. + +## Sample output prompts generated on-device +1. --prompt "what is gravity?" --max-output-tokens 30 +~~~ +-------- Response Summary -------- +Prompt: what is gravity? +Response: Hello! I'm here to help you answer your question. Gravity is a fundamental force of nature that affects the behavior of objects with mass +~~~ + +2. --prompt "what is 2+3?" --max-output-tokens 30 +~~~ +-------- Response Summary -------- +Prompt: what is 2+3? +Response: Of course! I'm happy to help! The answer to 2+3 is 5. +~~~ + +3. --prompt "could you please write code for fibonacci series in python?" --max-output-tokens 100 +~~~ +-------- Response Summary -------- +Prompt: could you please write code for fibonacci series in python? +Response: Of course! Here is an example of how you could implement the Fibonacci sequence in Python: +``` +def fibonacci(n): + if n <= 1: + return n + else: + return fibonacci(n-1) + fibonacci(n-2) +``` +You can test the function by calling it with different values of `n`, like this: +``` +print(fibonacci(5)) +~~~ diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py index ca8e050f..652f98e4 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py @@ -2,6 +2,7 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from .app import ChatApp as App # noqa: F401 +from qai_hub_models.models._shared.llama.app import ChatApp as App # noqa: F401 + from .model import MODEL_ID # noqa: F401 from .model import Llama2_Quantized as Model # noqa: F401 diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py index 33c39ca9..14cf4b71 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py @@ -6,112 +6,84 @@ from typing import List, Type -import qai_hub as hub -from transformers import LlamaTokenizer - +from qai_hub_models.models._shared.llama.demo import llama_chat_demo from qai_hub_models.models.llama_v2_7b_chat_quantized import MODEL_ID, Model -from qai_hub_models.models.llama_v2_7b_chat_quantized.app import ChatApp as App -from qai_hub_models.models.llama_v2_7b_chat_quantized.app import ( - HubLlama2ModelPipeline, - Llama2ModelPipeline, -) from qai_hub_models.models.llama_v2_7b_chat_quantized.model import ( - DEFAULT_INPUT_SEQ_LEN, DEFAULT_USER_PROMPT, + END_TOKENS, HF_REPO_NAME, HF_REPO_URL, -) -from qai_hub_models.utils.args import ( - get_model_cli_parser, - get_on_device_demo_parser, - validate_on_device_demo_args, + MODEL_SPLIT_MAP, + NUM_KEY_VAL_HEADS, + NUM_SPLITS, + Llama2_PromptProcessor_1_Quantized, + Llama2_PromptProcessor_2_Quantized, + Llama2_PromptProcessor_3_Quantized, + Llama2_PromptProcessor_4_Quantized, + Llama2_TokenGenerator_1_Quantized, + Llama2_TokenGenerator_2_Quantized, + Llama2_TokenGenerator_3_Quantized, + Llama2_TokenGenerator_4_Quantized, + get_input_prompt_with_tags, + get_tokenizer, + prepare_combined_attention_mask, ) from qai_hub_models.utils.base_model import BaseModel, TargetRuntime -from qai_hub_models.utils.huggingface import has_model_access -# Max output tokens to generate -# You can override this with cli argument. -# Keeping this short as on-device demo takes time to converge. -MAX_OUTPUT_TOKENS = 10 -DEFAULT_DEVICE = "Samsung Galaxy S24" +def _get_model_class(split_part: int, is_token_generator: bool = False): + if split_part < 1 or split_part > 4: + raise RuntimeError( + "Incorrect index provided to request Model split class." + f" Must be within (1-4), provided ({split_part})." + ) + + if is_token_generator: + return [ + Llama2_TokenGenerator_1_Quantized, + Llama2_TokenGenerator_2_Quantized, + Llama2_TokenGenerator_3_Quantized, + Llama2_TokenGenerator_4_Quantized, + ][split_part - 1] + return [ + Llama2_PromptProcessor_1_Quantized, + Llama2_PromptProcessor_2_Quantized, + Llama2_PromptProcessor_3_Quantized, + Llama2_PromptProcessor_4_Quantized, + ][split_part - 1] -def llama_chat_demo( + +def llama_2_chat_demo( model_cls: Type[BaseModel] = Model, model_id: str = MODEL_ID, + num_splits: int = NUM_SPLITS, + num_key_val_heads: int = NUM_KEY_VAL_HEADS, + model_split_map: dict = MODEL_SPLIT_MAP, + end_tokens: set = END_TOKENS, + hf_repo_name: str = HF_REPO_NAME, + hf_repo_url: str = HF_REPO_URL, default_prompt: str = DEFAULT_USER_PROMPT, is_test: bool = False, available_target_runtimes: List[TargetRuntime] = [TargetRuntime.QNN], ): - # Demo parameters - parser = get_model_cli_parser(model_cls) - parser = get_on_device_demo_parser( - parser, - add_output_dir=True, + llama_chat_demo( + model_cls=model_cls, + model_id=model_id, + get_model_class=_get_model_class, + get_input_prompt_with_tags=get_input_prompt_with_tags, + prepare_combined_attention_mask=prepare_combined_attention_mask, + tokenizer=get_tokenizer(), + num_splits=num_splits, + num_key_val_heads=num_key_val_heads, + model_split_map=model_split_map, + end_tokens=end_tokens, + hf_repo_name=hf_repo_name, + hf_repo_url=hf_repo_url, + default_prompt=default_prompt, + is_test=is_test, available_target_runtimes=available_target_runtimes, - default_device=DEFAULT_DEVICE, - ) - parser.add_argument( - "--prompt", - type=str, - default=default_prompt, - help="input prompt.", - ) - parser.add_argument( - "--prompt-processor-input-seq-len", - type=int, - default=DEFAULT_INPUT_SEQ_LEN, - help="input sequence length for prompt-processor. This must be less than `max_position_embeddings` set for model.", - ) - parser.add_argument( - "--max-output-tokens", - type=int, - default=MAX_OUTPUT_TOKENS, - help="max output tokens to generate.", - ) - args = parser.parse_args([] if is_test else None) - validate_on_device_demo_args(args, model_id) - - if not args.on_device: - prompt_processor = Llama2ModelPipeline(Model.from_pretrained()) - token_generator = Llama2ModelPipeline( - Model.from_pretrained(), is_token_generator=True - ) - else: - hub_model_ids = args.hub_model_id.split(",") - # First four models are Prompt Processor - # Last four models are Token Generator - if len(hub_model_ids) != 8: - raise RuntimeError( - "Please provide comma separated hub-model-ids for Llama Prompt Processor and Token Generator," - " e.g. --hub-model-id ,,,,,,,.\n" - "Specify model-ids for four Prompt Processor models first, then Token Generator models.\n" - "If you run export.py it will print out command to run on-device demo with ordered model-ids." - ) - - hub_device = hub.Device(args.device) - prompt_processor = HubLlama2ModelPipeline( - hub_model_ids[:4], - hub_device=hub_device, - inference_options=args.inference_options, - ) - token_generator = HubLlama2ModelPipeline( - hub_model_ids[4:], - hub_device=hub_device, - inference_options=args.inference_options, - is_token_generator=True, - ) - - has_model_access(HF_REPO_NAME, HF_REPO_URL) - tokenizer = LlamaTokenizer.from_pretrained(HF_REPO_NAME) - - app = App(prompt_processor, token_generator, tokenizer=tokenizer) - app.generate_output_prompt( - args.prompt, - max_seq_len=args.prompt_processor_input_seq_len, - max_output_tokens=args.max_output_tokens, ) if __name__ == "__main__": - llama_chat_demo() + llama_2_chat_demo() diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py index 30fe892d..f0e951d0 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py @@ -32,27 +32,27 @@ from qai_hub_models.utils.qnn_helpers import get_qnn_inputs ALL_COMPONENTS = [ - "Llama2_PromptProcessor_1_Quantized", - "Llama2_PromptProcessor_2_Quantized", - "Llama2_PromptProcessor_3_Quantized", - "Llama2_PromptProcessor_4_Quantized", - "Llama2_TokenGenerator_1_Quantized", - "Llama2_TokenGenerator_2_Quantized", - "Llama2_TokenGenerator_3_Quantized", - "Llama2_TokenGenerator_4_Quantized", + "PromptProcessor_1_Quantized", + "PromptProcessor_2_Quantized", + "PromptProcessor_3_Quantized", + "PromptProcessor_4_Quantized", + "TokenGenerator_1_Quantized", + "TokenGenerator_2_Quantized", + "TokenGenerator_3_Quantized", + "TokenGenerator_4_Quantized", ] DEFAULT_COMPONENTS = [ - "Llama2_PromptProcessor_1_Quantized", - "Llama2_PromptProcessor_2_Quantized", - "Llama2_PromptProcessor_3_Quantized", - "Llama2_PromptProcessor_4_Quantized", - "Llama2_TokenGenerator_1_Quantized", - "Llama2_TokenGenerator_2_Quantized", - "Llama2_TokenGenerator_3_Quantized", - "Llama2_TokenGenerator_4_Quantized", + "PromptProcessor_1_Quantized", + "PromptProcessor_2_Quantized", + "PromptProcessor_3_Quantized", + "PromptProcessor_4_Quantized", + "TokenGenerator_1_Quantized", + "TokenGenerator_2_Quantized", + "TokenGenerator_3_Quantized", + "TokenGenerator_4_Quantized", ] -DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24" +DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24 (Family)" def export_model( @@ -137,8 +137,9 @@ def export_model( compile_jobs: Dict[str, hub.client.CompileJob] = {} profile_options_per_component: Dict[str, str] = {} - for component_name in components: + for i, component_name in enumerate(components): # Load model part + component = model.load_model_part(component_name) input_spec = component.get_input_spec( @@ -281,7 +282,7 @@ def main(): model_cls=Model, components=ALL_COMPONENTS, supports_tflite=False, - supports_ort=False, + supports_precompiled_qnn_onnx=False, default_export_device=DEFAULT_EXPORT_DEVICE, ) args = parser.parse_args() diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml index 1069bac6..f142ff26 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml @@ -6,7 +6,8 @@ headline: State-of-the-art large language model useful on a variety of language domain: Generative AI description: Llama 2 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to - 4-bit weights and 16-bit activations making it suitable for on-device + w4a16(4-bit weights and 16-bit activations) and part of the model is quantized to + w8a16(8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-KVCache-Quantized's latency. @@ -21,17 +22,18 @@ license: https://github.com/facebookresearch/llama/blob/main/LICENSE source_repo: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf technical_details: Number of parameters: 7B - Model size: 3.6GB + Precision: w4a16 + w8a16 (few layers) Model-1 (Prompt Processor): Llama-PromptProcessor-Quantized Max context length: 1024 + Prompt processor model size: 3.6 GB Prompt processor input: 1024 tokens Prompt processor output: 1024 output tokens + KVCache for token generator Model-2 (Token Generator): Llama-TokenGenerator-KVCache-Quantized + Token generator model size: 3.6 GB Token generator input: 1 input token + past KVCache Token generator output: 1 output token + KVCache for next iteration Decoding length: 1024 (1 output token + 1023 from KVCache) Use: Initiate conversation with prompt-processor and then token generator for subsequent iterations. - QNN-SDK: "2.19" applicable_scenarios: - Dialogue - Content Generation diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py index 5dc3fda2..7e97997f 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py @@ -5,25 +5,28 @@ from __future__ import annotations import os -import pickle from typing import Optional, Tuple import torch -from qai_hub.client import DatasetEntries, Device +from qai_hub.client import DatasetEntries -from qai_hub_models.models.common import ( - SampleInputsType, - SourceModelFormat, - TargetRuntime, +from qai_hub_models.models._shared.llama.model import ( + DEFAULT_INPUT_SEQ_LEN, + Llama_QuantizedMixin, + RopeEmbedding, + get_hidden_layer_range_from_split, + get_past_key_names, + get_past_keyval_with_shift, + load_input_cached_data, + make_torch_compatible_past_key_values, + save_input_cached_data, ) from qai_hub_models.models.llama_v2_7b_chat_quantized.modeling_llama import ( LlamaForCausalLM, LlamaModel, - RopeEmbedding, ) -from qai_hub_models.utils.aimet.aimet_dummy_model import AimetEncodingLoaderMixin -from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, CachedWebModelAsset -from qai_hub_models.utils.base_model import BaseModel, CollectionModel, TargetRuntime +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset +from qai_hub_models.utils.base_model import CollectionModel, TargetRuntime from qai_hub_models.utils.huggingface import ( ensure_has_required_transformer, has_model_access, @@ -44,20 +47,32 @@ MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 4 +MODEL_ASSET_VERSION = 6 # Configs AIMET_ENCODINGS_PREFIX = "config" -AIMET_CONFIG = "default_config_llama" # Model parameters MAX_HIDDEN_LAYERS = 32 MAX_POS_EMBEDDINGS = 1024 DEFAULT_INPUT_SEQ_LEN = 1024 +ATTENTION_HIDDEN_DIM = 4096 +POS_EMBED_DIM = 64 DATA_DIR = "data" USE_CACHED_DATA = True NUM_SPLITS = 4 LAYERS_PER_SPLIT = 8 +NUM_KEY_VAL_HEADS = 32 + +# Model split map to track DecodeLayer split for each part +# key (model split number) -> +# value Tuple of (start index of decoder Layer, end index of decode layer) +MODEL_SPLIT_MAP = { + 1: (0, 8), + 2: (8, 16), + 3: (16, 24), + 4: (24, 32), +} # Hugging face repo name and url HF_REPO_NAME = "meta-llama/Llama-2-7b-chat-hf" @@ -73,11 +88,10 @@ SYS_END = "<>" INST_START = "[INST]" INST_END = "[/INST]" -DEFAULT_PROMPT_CONTEXT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. +END_TOKENS = {""} -If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. -""" -DEFAULT_USER_PROMPT = "Hi! How are you?" +DEFAULT_PROMPT_CONTEXT = "You are a helpful AI assistant" +DEFAULT_USER_PROMPT = "Hi! What is 2+3?" def get_input_prompt_with_tags( @@ -131,136 +145,6 @@ def prepare_combined_attention_mask( return new_mask -def _input_cached_data_save( - data: dict, - split_part: int, - model_type: str = "pp", - input_seq_len: int = DEFAULT_INPUT_SEQ_LEN, -): - data_path = ( - f"{DATA_DIR}/{input_seq_len}/llama_v2_{split_part}_{model_type}_inputs.pkl" - ) - - inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path( - MODEL_ID, - MODEL_ASSET_VERSION, - f"{data_path}", - ) - - # if already exists, no need to re-serialize. - if os.path.exists(inputs_pkl_path): - return - - os.makedirs(os.path.dirname(inputs_pkl_path), exist_ok=True) - with open(f"{inputs_pkl_path}", "wb") as f: - pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) - - -def _input_cached_data_load( - split_part: int, model_type: str = "pp", input_seq_len: int = DEFAULT_INPUT_SEQ_LEN -): - data_path = ( - f"{DATA_DIR}/{input_seq_len}/llama_v2_{split_part}_{model_type}_inputs.pkl" - ) - try: - - # Load local data path if already generated - inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path( - MODEL_ID, - MODEL_ASSET_VERSION, - f"{data_path}", - ) - - # If local data path not found, fetch from server if available - if not os.path.exists(inputs_pkl_path): - inputs_pkl_path = CachedWebModelAsset.from_asset_store( - MODEL_ID, - MODEL_ASSET_VERSION, - data_path, - ).fetch() - - with open(f"{inputs_pkl_path}", "rb") as f: - return pickle.load(f) - except Exception: - # Delete intermediate data file if error occurs - if os.path.exists(inputs_pkl_path): - os.remove(inputs_pkl_path) - print( - f"Unable to load cached data for {data_path}, creating data using PyTorch models." - ) - # Unable to load cached data, return None - return None - - -def _get_model_data( - split_part: int, - input_seq_len: int = DEFAULT_INPUT_SEQ_LEN, - is_token_generator=False, -): - """ - Helper method to get model data from given split number - """ - if is_token_generator: - if split_part == 1: - return Llama2_TokenGenerator_1_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - if split_part == 2: - return Llama2_TokenGenerator_2_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - if split_part == 3: - return Llama2_TokenGenerator_3_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - if split_part == 4: - return Llama2_TokenGenerator_4_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - else: - if split_part == 1: - return Llama2_PromptProcessor_1_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - elif split_part == 2: - return Llama2_PromptProcessor_2_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - elif split_part == 3: - return Llama2_PromptProcessor_3_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - elif split_part == 4: - return Llama2_PromptProcessor_4_Quantized.get_model_data( - input_seq_len=input_seq_len - ) - raise RuntimeError(f"Unsupported split_part {split_part} provided.") - - -def _get_hidden_layer_range_from_split(split_part: int): - num_of_hidden_layers_per_part = LAYERS_PER_SPLIT - hidden_layers_start = num_of_hidden_layers_per_part * (split_part - 1) - hidden_layers_end = hidden_layers_start + num_of_hidden_layers_per_part - return hidden_layers_start, hidden_layers_end - - -def _get_past_key_names(start: int = 0, end: int = 8, suffix=""): - past_key_val_name = [] - for i in range(start, end): - cache_names = [f"past_key_{i}_h{j}{suffix}" for j in range(32)] + [ - f"past_value_{i}_h{j}{suffix}" for j in range(32) - ] - past_key_val_name.extend(cache_names) - return past_key_val_name - - -def _get_output_names_from_split(split_part: int = 1): - layer_start, layer_end = _get_hidden_layer_range_from_split(split_part=split_part) - output_list = [f"layers_{layer_end - 1}_add_out_0"] - output_list += _get_past_key_names(layer_start, layer_end, suffix="_out") - return output_list - - class Llama2Wrapper(torch.nn.Module): def __init__( self, @@ -294,8 +178,8 @@ def __init__( f"Llama2 split_part must be within 1-4 (Provided {split_part})." ) - hidden_layers_start, hidden_layers_end = _get_hidden_layer_range_from_split( - split_part + hidden_layers_start, hidden_layers_end = get_hidden_layer_range_from_split( + split_part, MODEL_SPLIT_MAP ) config.hidden_layers_start = hidden_layers_start config.hidden_layers_end = hidden_layers_end @@ -375,7 +259,7 @@ def forward_token_generator( position_ids_sin, *past_key_values, ): - past_key_values_tuple = _make_torch_compatible_past_key_values( + past_key_values_tuple = make_torch_compatible_past_key_values( self.total_hidden_layers, 32, *past_key_values ) return self.model( @@ -437,85 +321,42 @@ def from_pretrained( return Llama2_Quantized(max_position_embeddings=max_position_embeddings) def load_model_part(self, split_part): - if split_part == "Llama2_PromptProcessor_1_Quantized": + if split_part == "PromptProcessor_1_Quantized": return Llama2_PromptProcessor_1_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) - if split_part == "Llama2_PromptProcessor_2_Quantized": + if split_part == "PromptProcessor_2_Quantized": return Llama2_PromptProcessor_2_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) - if split_part == "Llama2_PromptProcessor_3_Quantized": + if split_part == "PromptProcessor_3_Quantized": return Llama2_PromptProcessor_3_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) - if split_part == "Llama2_PromptProcessor_4_Quantized": + if split_part == "PromptProcessor_4_Quantized": return Llama2_PromptProcessor_4_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) - if split_part == "Llama2_TokenGenerator_1_Quantized": + if split_part == "TokenGenerator_1_Quantized": return Llama2_TokenGenerator_1_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings, ) - if split_part == "Llama2_TokenGenerator_2_Quantized": + if split_part == "TokenGenerator_2_Quantized": return Llama2_TokenGenerator_2_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) - if split_part == "Llama2_TokenGenerator_3_Quantized": + if split_part == "TokenGenerator_3_Quantized": return Llama2_TokenGenerator_3_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) - if split_part == "Llama2_TokenGenerator_4_Quantized": + if split_part == "TokenGenerator_4_Quantized": return Llama2_TokenGenerator_4_Quantized.from_pretrained( max_position_embeddings=self.max_position_embeddings ) raise RuntimeError(f"Unsupported split_part {split_part}.") -class Llama2_QuantizedMixin(AimetEncodingLoaderMixin, BaseModel): - def __init__(self, model, encoding_path, is_token_generator=False): - AimetEncodingLoaderMixin.__init__(self, model, encoding_path) - BaseModel.__init__(self) - self.model = model - self.split_part = 1 - self.is_token_generator = is_token_generator - - def get_hub_compile_options( - self, - target_runtime: TargetRuntime, - other_compile_options: str = "", - device: Optional[Device] = None, - ) -> str: - if target_runtime != TargetRuntime.QNN: - raise RuntimeError( - f"Unsupported target_runtime provided: {target_runtime}." - " Only QNN runtime is supported for Llama for now." - ) - - return " --target_runtime qnn_context_binary --quantize_full_type w8a16 --quantize_io" - - @staticmethod - def get_output_names(): - # Clipped hidden layers are named same as first part for all parts - # Eventually, each split should have respective names. - return _get_output_names_from_split(split_part=1) - - def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType: - data = self.get_calibration_data(input_spec=input_spec) - for key, val in data.items(): - data[key] = [val.detach().numpy()] - return data - - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - """ - Source model format preferred for conversion on AI Hub. - """ - return SourceModelFormat.ONNX - - -class Llama2_PromptProcessor_1_Quantized(Llama2_QuantizedMixin): +class Llama2_PromptProcessor_1_Quantized(Llama_QuantizedMixin): def __init__(self, model, encoding_path): super().__init__(model, encoding_path) self.model = model @@ -550,13 +391,20 @@ def get_input_spec( return { "input_ids": ((1, input_seq_length), "int32"), "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"), - "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"), + "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), } @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=1) + data = load_input_cached_data( + split_part=1, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -585,7 +433,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): ).get_embedding(position_ids) inputs["position_ids_cos"] = position_ids_cos inputs["position_ids_sin"] = position_ids_sin - _input_cached_data_save(inputs, split_part=1, input_seq_len=input_seq_len) + save_input_cached_data( + inputs, + split_part=1, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) return inputs def get_calibration_data( @@ -605,7 +461,7 @@ def get_calibration_data( ) -class Llama2_PromptProcessor_2_Quantized(Llama2_QuantizedMixin): +class Llama2_PromptProcessor_2_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path) self.split_part = 2 @@ -637,15 +493,22 @@ def get_input_spec( # This can be used with the qai_hub python API to declare # the model input specification upon submitting a compile job. return { - "input_ids": ((1, input_seq_length, 4096), "float32"), + "input_ids": ((1, input_seq_length, ATTENTION_HIDDEN_DIM), "float32"), "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"), - "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"), + "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), } @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=2) + data = load_input_cached_data( + split_part=2, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -661,7 +524,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): new_inputs["attention_mask"] = inputs["attention_mask"] new_inputs["position_ids_cos"] = inputs["position_ids_cos"] new_inputs["position_ids_sin"] = inputs["position_ids_sin"] - _input_cached_data_save(new_inputs, split_part=2, input_seq_len=input_seq_len) + save_input_cached_data( + new_inputs, + split_part=2, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) return new_inputs def get_calibration_data( @@ -681,7 +552,7 @@ def get_calibration_data( ) -class Llama2_PromptProcessor_3_Quantized(Llama2_QuantizedMixin): +class Llama2_PromptProcessor_3_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path) self.split_part = 3 @@ -713,15 +584,22 @@ def get_input_spec( # This can be used with the qai_hub python API to declare # the model input specification upon submitting a compile job. return { - "input_ids": ((1, input_seq_length, 4096), "float32"), + "input_ids": ((1, input_seq_length, ATTENTION_HIDDEN_DIM), "float32"), "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"), - "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"), + "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), } @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=3) + data = load_input_cached_data( + split_part=3, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -737,7 +615,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): new_inputs["attention_mask"] = inputs["attention_mask"] new_inputs["position_ids_cos"] = inputs["position_ids_cos"] new_inputs["position_ids_sin"] = inputs["position_ids_sin"] - _input_cached_data_save(new_inputs, split_part=3, input_seq_len=input_seq_len) + save_input_cached_data( + new_inputs, + split_part=3, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) return new_inputs def get_calibration_data( @@ -757,7 +643,7 @@ def get_calibration_data( ) -class Llama2_PromptProcessor_4_Quantized(Llama2_QuantizedMixin): +class Llama2_PromptProcessor_4_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path) self.split_part = 4 @@ -789,15 +675,34 @@ def get_input_spec( # This can be used with the qai_hub python API to declare # the model input specification upon submitting a compile job. return { - "input_ids": ((1, input_seq_length, 4096), "float32"), + "input_ids": ((1, input_seq_length, ATTENTION_HIDDEN_DIM), "float32"), "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"), - "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"), + "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"), } + @staticmethod + def get_output_names(): + layers_start, layers_end = get_hidden_layer_range_from_split( + split_part=4, model_split_map=MODEL_SPLIT_MAP + ) + return Llama_QuantizedMixin.get_output_names( + start=layers_start, + end=layers_end, + past_key_val_heads=NUM_KEY_VAL_HEADS, + output_name="logits", + ) + @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=4) + data = load_input_cached_data( + split_part=4, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -812,7 +717,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): new_inputs["attention_mask"] = inputs["attention_mask"] new_inputs["position_ids_cos"] = inputs["position_ids_cos"] new_inputs["position_ids_sin"] = inputs["position_ids_sin"] - _input_cached_data_save(new_inputs, split_part=4, input_seq_len=input_seq_len) + save_input_cached_data( + new_inputs, + split_part=4, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + input_seq_len=input_seq_len, + ) return new_inputs def get_calibration_data( @@ -837,49 +750,7 @@ def get_calibration_data( # -def get_past_keyval_with_shift(past_key_vals): - """ - Clip past key value to feed next iteration - """ - tg_inputs = {} - for i in range(0, len(past_key_vals), 64): - l_num = i // 64 - for j, key in enumerate(past_key_vals[i : i + 32]): - tg_inputs[f"past_key_{l_num}_h{j}"] = key[:, :, :, 1:].detach() - - for j, val in enumerate(past_key_vals[i + 32 : i + 64]): - tg_inputs[f"past_value_{l_num}_h{j}"] = val[:, :, 1:, :].detach() - - return tg_inputs - - -def _make_torch_compatible_past_key_values( - decode_layers, split_per_layer, *past_values_flattened -): - past_key_values = [] - total_past_entries = len(past_values_flattened) - - # past values consists of - # 1. k decode/hidden layers - # 2. each decode layer has 2 entries: key and value - # 3. each key-value entry is has 32 layer - if total_past_entries != decode_layers * split_per_layer * 2: - raise RuntimeError( - "Incorrect number of past key-values provided for model." - f"Expecting {decode_layers * split_per_layer * 2}, got {total_past_entries}." - ) - - for i in range(0, decode_layers * 2, 2): - keys = past_values_flattened[i * split_per_layer : (i + 1) * split_per_layer] - values = past_values_flattened[ - (i + 1) * split_per_layer : (i + 2) * split_per_layer - ] - - past_key_values.append((keys, values)) - return tuple(past_key_values) - - -class Llama2_TokenGenerator_1_Quantized(Llama2_QuantizedMixin): +class Llama2_TokenGenerator_1_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path, is_token_generator=True) self.split_part = 1 @@ -923,12 +794,12 @@ def get_input_spec( input_spec = { "input_ids": ((1, 1), "int32"), "attention_mask": ((1, 1, 1, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, 1, 64), "float32"), - "position_ids_sin": ((1, 1, 1, 64), "float32"), + "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"), } # Collect past_key_values and drop output names - past_key_val_names = _get_past_key_names() + past_key_val_names = get_past_key_names() for past_key_val in past_key_val_names: if "key" in past_key_val: input_spec[past_key_val] = ( @@ -944,7 +815,15 @@ def get_input_spec( @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=1, model_type="tg") + data = load_input_cached_data( + split_part=1, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + model_type="tg", + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -1000,14 +879,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): "position_ids_sin": position_ids_sin, } - key_val = get_past_keyval_with_shift(output[1:]) + key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS) for key, val in key_val.items(): data[key] = val - _input_cached_data_save( + save_input_cached_data( data, split_part=1, model_type="tg", + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, input_seq_len=input_seq_len, ) return data @@ -1030,7 +913,7 @@ def get_calibration_data( ) -class Llama2_TokenGenerator_2_Quantized(Llama2_QuantizedMixin): +class Llama2_TokenGenerator_2_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path, is_token_generator=True) self.split_part = 2 @@ -1072,14 +955,14 @@ def get_input_spec( # the model input specification upon submitting a compile job. input_spec = { - "input_ids": ((1, 1, 4096), "float32"), + "input_ids": ((1, 1, ATTENTION_HIDDEN_DIM), "float32"), "attention_mask": ((1, 1, 1, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, 1, 64), "float32"), - "position_ids_sin": ((1, 1, 1, 64), "float32"), + "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"), } # Collect past_key_values and drop output names - past_key_val_names = _get_past_key_names() + past_key_val_names = get_past_key_names() for past_key_val in past_key_val_names: if "key" in past_key_val: input_spec[past_key_val] = ( @@ -1095,7 +978,15 @@ def get_input_spec( @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=2, model_type="tg") + data = load_input_cached_data( + split_part=2, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + model_type="tg", + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -1120,14 +1011,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): "position_ids_sin": inputs["position_ids_sin"], } - key_val = get_past_keyval_with_shift(output[1:]) + key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS) for key, val in key_val.items(): data[key] = val - _input_cached_data_save( + save_input_cached_data( data, split_part=2, model_type="tg", + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, input_seq_len=input_seq_len, ) return data @@ -1149,7 +1044,7 @@ def get_calibration_data( ) -class Llama2_TokenGenerator_3_Quantized(Llama2_QuantizedMixin): +class Llama2_TokenGenerator_3_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path, is_token_generator=True) self.split_part = 3 @@ -1191,14 +1086,14 @@ def get_input_spec( # the model input specification upon submitting a compile job. input_spec = { - "input_ids": ((1, 1, 4096), "float32"), + "input_ids": ((1, 1, ATTENTION_HIDDEN_DIM), "float32"), "attention_mask": ((1, 1, 1, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, 1, 64), "float32"), - "position_ids_sin": ((1, 1, 1, 64), "float32"), + "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"), } # Collect past_key_values and drop output names - past_key_val_names = _get_past_key_names() + past_key_val_names = get_past_key_names() for past_key_val in past_key_val_names: if "key" in past_key_val: input_spec[past_key_val] = ( @@ -1214,7 +1109,15 @@ def get_input_spec( @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=3, model_type="tg") + data = load_input_cached_data( + split_part=3, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + model_type="tg", + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -1239,14 +1142,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): "position_ids_sin": inputs["position_ids_sin"], } - key_val = get_past_keyval_with_shift(output[1:]) + key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS) for key, val in key_val.items(): data[key] = val - _input_cached_data_save( + save_input_cached_data( data, split_part=3, model_type="tg", + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, input_seq_len=input_seq_len, ) return data @@ -1268,7 +1175,7 @@ def get_calibration_data( ) -class Llama2_TokenGenerator_4_Quantized(Llama2_QuantizedMixin): +class Llama2_TokenGenerator_4_Quantized(Llama_QuantizedMixin): def __init__(self, model: torch.nn.Module, encoding_path: str): super().__init__(model, encoding_path, is_token_generator=True) self.split_part = 4 @@ -1310,14 +1217,14 @@ def get_input_spec( # the model input specification upon submitting a compile job. input_spec = { - "input_ids": ((1, 1, 4096), "float32"), + "input_ids": ((1, 1, ATTENTION_HIDDEN_DIM), "float32"), "attention_mask": ((1, 1, 1, input_seq_length), "float32"), - "position_ids_cos": ((1, 1, 1, 64), "float32"), - "position_ids_sin": ((1, 1, 1, 64), "float32"), + "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"), + "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"), } # Collect past_key_values and drop output names - past_key_val_names = _get_past_key_names() + past_key_val_names = get_past_key_names() for past_key_val in past_key_val_names: if "key" in past_key_val: input_spec[past_key_val] = ( @@ -1331,9 +1238,29 @@ def get_input_spec( ) return input_spec + @staticmethod + def get_output_names(): + layers_start, layers_end = get_hidden_layer_range_from_split( + split_part=4, model_split_map=MODEL_SPLIT_MAP + ) + return Llama_QuantizedMixin.get_output_names( + start=layers_start, + end=layers_end, + past_key_val_heads=NUM_KEY_VAL_HEADS, + output_name="logits", + ) + @staticmethod def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): - data = _input_cached_data_load(split_part=4, model_type="tg") + data = load_input_cached_data( + split_part=4, + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, + model_type="tg", + input_seq_len=input_seq_len, + ) if data is not None: return data @@ -1358,14 +1285,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN): "position_ids_sin": inputs["position_ids_sin"], } - key_val = get_past_keyval_with_shift(output[1:]) + key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS) for key, val in key_val.items(): data[key] = val - _input_cached_data_save( + save_input_cached_data( data, split_part=4, model_type="tg", + data_dir=DATA_DIR, + model_name="llama_v2", + model_id=MODEL_ID, + model_asset_version=MODEL_ASSET_VERSION, input_seq_len=input_seq_len, ) return data diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py index b5ae0f78..3c8c0ec3 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py @@ -23,18 +23,19 @@ # limitations under the License. """ PyTorch LLaMA model.""" +from __future__ import annotations + import math from typing import List, Optional, Tuple, Union import torch import torch.utils.checkpoint from torch import nn -from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss +from torch.nn import CrossEntropyLoss from transformers.activations import ACT2FN from transformers.modeling_outputs import ( BaseModelOutputWithPast, CausalLMOutputWithPast, - SequenceClassifierOutputWithPast, ) from transformers.modeling_utils import PreTrainedModel from transformers.models.llama.configuration_llama import LlamaConfig @@ -190,50 +191,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids): ### ------- QCOM EDITS STARTS ------- ### -class RopeEmbedding: - """ - Compute Rotary Position Embedding - Ref: https://arxiv.org/pdf/2104.09864 - - Compute RopeEmbedding outside model to simplify model quantization - """ - - def __init__(self, head_dim: int = 128, max_length: int = 1024): - """ - head_dim: dimension size of head - max_length: max sequence length to expect - """ - self.max_length = max_length - self.cos, self.sin = self.precompute_freqs_cis(head_dim, max_length * 2) - - def precompute_freqs_cis(self, dim: int, end: int, theta: float = 10000.0): - """ - Precompute embeeding matrix - """ - freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) - t = torch.arange(end) - freqs = torch.outer(t, freqs).float() - freqs_cis = torch.polar(torch.ones_like(freqs), freqs) - freqs_cis = freqs_cis[0 : self.max_length] - freqs_real = torch.view_as_real(freqs_cis) - freqs_real = freqs_real.unsqueeze(0).unsqueeze(0) - - freqs_cos = freqs_real[:, :, :, :, 0] # extract even elements - freqs_sin = freqs_real[:, :, :, :, 1] # extract odd elements - return freqs_cos, freqs_sin - - def get_embedding(self, position_ids: torch.Tensor): - """ - position_ids: [batch_size, sequence_length] - return [batch_size, 1, sequence_length, head_sim//2][2] - """ - cos = self.cos[0, 0, :, :] # [seq_len, dim] - sin = self.sin[0, 0, :, :] # [seq_len, dim] - cos = cos[position_ids].unsqueeze(1) - sin = sin[position_ids].unsqueeze(1) - return cos, sin - - def apply_rotary_pos_emb_single(x, cos, sin, position_ids): # The first two dimensions of cos and sin are always 1, so we can `squeeze` them. cos = cos[0, 0, :, :] # [seq_len, dim] @@ -911,6 +868,9 @@ def forward( ) ### ------- QCOM EDITS STARTS ------- ### + # Combined attention mask expand attention mask to rank-4 + # [ bsz, 1, tgt_seq_len, src_seq_len ] + # check attention mask shape and fetch sequence length correctly. elif attention_mask is not None: attention_shape = attention_mask.shape batch_size = attention_shape[0] @@ -1310,137 +1270,3 @@ def _reorder_cache(past_key_values, beam_idx): ), ) return reordered_past - - -@add_start_docstrings( - """ - The LLaMa Model transformer with a sequence classification head on top (linear layer). - - [`LlamaForSequenceClassification`] uses the last token in order to do the classification, as other causal models - (e.g. GPT-2) do. - - Since it does classification on the last token, it requires to know the position of the last token. If a - `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If - no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the - padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in - each row of the batch). - """, - LLAMA_START_DOCSTRING, -) -class LlamaForSequenceClassification(LlamaPreTrainedModel): - _keys_to_ignore_on_load_missing = [r"lm_head.weight"] - - def __init__(self, config): - super().__init__(config) - self.num_labels = config.num_labels - self.model = LlamaModel(config) - self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False) - - # Initialize weights and apply final processing - self.post_init() - - def get_input_embeddings(self): - return self.model.embed_tokens - - def set_input_embeddings(self, value): - self.model.embed_tokens = value - - @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING) - def forward( - self, - input_ids: torch.LongTensor = None, - attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - labels: Optional[torch.LongTensor] = None, - use_cache: Optional[bool] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, - return_dict: Optional[bool] = None, - ) -> Union[Tuple, SequenceClassifierOutputWithPast]: - r""" - labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): - Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., - config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If - `config.num_labels > 1` a classification loss is computed (Cross-Entropy). - """ - return_dict = ( - return_dict if return_dict is not None else self.config.use_return_dict - ) - - transformer_outputs = self.model( - input_ids, - attention_mask=attention_mask, - position_ids=position_ids, - past_key_values=past_key_values, - inputs_embeds=inputs_embeds, - use_cache=use_cache, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict=return_dict, - ) - hidden_states = transformer_outputs[0] - logits = self.score(hidden_states) - - if input_ids is not None: - batch_size = input_ids.shape[0] - else: - batch_size = inputs_embeds.shape[0] - - if self.config.pad_token_id is None and batch_size != 1: - raise ValueError( - "Cannot handle batch sizes > 1 if no padding token is defined." - ) - if self.config.pad_token_id is None: - sequence_lengths = -1 - else: - if input_ids is not None: - sequence_lengths = ( - torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 - ).to(logits.device) - else: - sequence_lengths = -1 - - pooled_logits = logits[ - torch.arange(batch_size, device=logits.device), sequence_lengths - ] - - loss = None - if labels is not None: - labels = labels.to(logits.device) - if self.config.problem_type is None: - if self.num_labels == 1: - self.config.problem_type = "regression" - elif self.num_labels > 1 and ( - labels.dtype == torch.long or labels.dtype == torch.int - ): - self.config.problem_type = "single_label_classification" - else: - self.config.problem_type = "multi_label_classification" - - if self.config.problem_type == "regression": - loss_fct = MSELoss() - if self.num_labels == 1: - loss = loss_fct(pooled_logits.squeeze(), labels.squeeze()) - else: - loss = loss_fct(pooled_logits, labels) - elif self.config.problem_type == "single_label_classification": - loss_fct = CrossEntropyLoss() - loss = loss_fct( - pooled_logits.view(-1, self.num_labels), labels.view(-1) - ) - elif self.config.problem_type == "multi_label_classification": - loss_fct = BCEWithLogitsLoss() - loss = loss_fct(pooled_logits, labels) - if not return_dict: - output = (pooled_logits,) + transformer_outputs[1:] - return ((loss,) + output) if loss is not None else output - - return SequenceClassifierOutputWithPast( - loss=loss, - logits=pooled_logits, - past_key_values=transformer_outputs.past_key_values, - hidden_states=transformer_outputs.hidden_states, - attentions=transformer_outputs.attentions, - ) diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml index b8c5ad10..748615cc 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml @@ -1,5 +1,5 @@ models: -- name: Llama-TokenGenerator-KVCache-Quantized +- name: Llama2-TokenGenerator-KVCache-Quantized performance_metrics: - reference_device_info: name: Samsung Galaxy S24 @@ -10,11 +10,11 @@ models: chipset: Snapdragon® 8 Gen 3 timestamp: '2024-05-23T00:34:02.549319Z' torchscript_onnx_qnn: - inference_time: 104953 - throughput: 9.528 + inference_time: 90268 + throughput: 11.07 estimated_peak_memory_range: - min: 331575296 - max: 5017129568 + min: 66715648 + max: 4562679888 layer_info: layers_on_npu: 34842 layers_on_gpu: 0 @@ -47,7 +47,7 @@ models: primary_compute_unit: NPU job_id: "null" job_status: Passed -- name: Llama-PromptProcessor-Quantized +- name: Llama2-PromptProcessor-Quantized performance_metrics: - reference_device_info: name: Samsung Galaxy S24 diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py index c194ed45..e34935d2 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py @@ -4,11 +4,11 @@ # --------------------------------------------------------------------- import pytest -from qai_hub_models.models.llama_v2_7b_chat_quantized.demo import llama_chat_demo +from qai_hub_models.models.llama_v2_7b_chat_quantized.demo import llama_2_chat_demo @pytest.mark.skip("#105 move slow_cloud and slow tests to nightly.") @pytest.mark.slow_cloud def test_demo(): # Run demo and verify it does not crash - llama_chat_demo(is_test=True) + llama_2_chat_demo(is_test=True) diff --git a/qai_hub_models/models/mediapipe_face/export.py b/qai_hub_models/models/mediapipe_face/export.py index adb92a20..8d84a972 100644 --- a/qai_hub_models/models/mediapipe_face/export.py +++ b/qai_hub_models/models/mediapipe_face/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml index ed6dfc34..a8237df8 100644 --- a/qai_hub_models/models/mediapipe_face/perf.yaml +++ b/qai_hub_models/models/mediapipe_face/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: MediaPipeFaceDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 781.0 - throughput: 1280.4097311139565 + inference_time: 780.0 + throughput: 1282.051282051282 estimated_peak_memory_range: - min: 90112 - max: 2155184 + min: 12288 + max: 2020120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j7gjkey75 + job_id: jn5qwjko5 job_status: Passed torchscript_onnx_qnn: - inference_time: 835.0 - throughput: 1197.6047904191616 + inference_time: 847.0 + throughput: 1180.637544273908 estimated_peak_memory_range: - min: 16384 - max: 101864120 + min: 2113536 + max: 36582392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jvgd7o4kg + job_id: jygzw1kxg job_status: Passed - torchscript_onnx_ort: - inference_time: 986.0 - throughput: 1014.1987829614604 + torchscript_onnx: + inference_time: 1018.0 + throughput: 982.3182711198428 estimated_peak_memory_range: - min: 552960 - max: 8114576 + min: 806912 + max: 7821152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jqpyv6yrp + job_id: jqp48z02g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:45:23Z' + timestamp: '2024-06-22T22:51:14Z' - torchscript_onnx_tflite: - inference_time: 543.0 - throughput: 1841.6206261510129 + inference_time: 539.0 + throughput: 1855.287569573284 estimated_peak_memory_range: - min: 16384 - max: 31618960 + min: 12288 + max: 33853952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jygzvryzp + job_id: jw56vklyp job_status: Passed torchscript_onnx_qnn: - inference_time: 593.0 - throughput: 1686.3406408094436 + inference_time: 590.0 + throughput: 1694.915254237288 estimated_peak_memory_range: - min: 802816 - max: 49388544 + min: 12288 + max: 42466864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jqp4jv4qp + job_id: jmg986e8p job_status: Passed - torchscript_onnx_ort: - inference_time: 706.0 - throughput: 1416.4305949008499 + torchscript_onnx: + inference_time: 729.0 + throughput: 1371.7421124828531 estimated_peak_memory_range: - min: 548864 - max: 22898592 + min: 2088960 + max: 22681920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j1p8wzkzp + job_id: jo5m4jy75 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:45:25Z' + timestamp: '2024-06-22T22:51:16Z' - torchscript_onnx_tflite: - inference_time: 779.0 - throughput: 1283.6970474967907 + inference_time: 780.0 + throughput: 1282.051282051282 estimated_peak_memory_range: - min: 12288 - max: 1532120 + min: 20480 + max: 1375848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jmg99w2qg + job_id: jwgomjqk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 835.0 - throughput: 1197.6047904191616 + inference_time: 845.0 + throughput: 1183.4319526627219 estimated_peak_memory_range: - min: 806912 - max: 77885504 + min: 815104 + max: 8055448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jopr1e7vg + job_id: jmg986emp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:45:21Z' + timestamp: '2024-06-22T22:51:10Z' + - torchscript_onnx_tflite: + inference_time: 776.0 + throughput: 1288.659793814433 + estimated_peak_memory_range: + min: 20480 + max: 1376952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 112 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 112 + job_id: j7gj1j4eg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 845.0 + throughput: 1183.4319526627219 + estimated_peak_memory_range: + min: 16384 + max: 94032616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jvgd0jl6p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:51:12Z' - torchscript_onnx_qnn: - inference_time: 928.0 - throughput: 1077.5862068965516 + inference_time: 970.0 + throughput: 1030.9278350515465 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jo5mv3ky5 + job_id: jvgd0jlzp job_status: Passed - torchscript_onnx_ort: - inference_time: 997.0 - throughput: 1003.0090270812437 + torchscript_onnx: + inference_time: 1007.0 + throughput: 993.0486593843099 estimated_peak_memory_range: - min: 5971968 - max: 5971968 + min: 5120000 + max: 5120000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jn5q93d7p + job_id: jopr9zjkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +256,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:45:27Z' + timestamp: '2024-06-22T22:51:18Z' - name: MediaPipeFaceLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 318.0 - throughput: 3144.6540880503144 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: - min: 12288 - max: 2130328 + min: 24576 + max: 1457024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +272,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jlpe4kx75 + job_id: j1gl7jrm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 391.0 - throughput: 2557.544757033248 + inference_time: 390.0 + throughput: 2564.102564102564 estimated_peak_memory_range: - min: 131072 - max: 98992544 + min: 16384 + max: 95095056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +287,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jz57vxnq5 + job_id: jz5wxjnmp job_status: Passed - torchscript_onnx_ort: - inference_time: 532.0 - throughput: 1879.6992481203008 + torchscript_onnx: + inference_time: 496.0 + throughput: 2016.1290322580646 estimated_peak_memory_range: min: 12288 - max: 84060104 + max: 5888232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +302,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j2p0elx25 + job_id: j0pxmw28g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +311,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:45:24Z' + timestamp: '2024-06-22T22:51:15Z' - torchscript_onnx_tflite: - inference_time: 224.0 - throughput: 4464.285714285715 + inference_time: 240.0 + throughput: 4166.666666666667 estimated_peak_memory_range: min: 16384 - max: 27155600 + max: 28725248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +325,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jz5wmqzzg + job_id: j1p38y2n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 283.0 - throughput: 3533.5689045936397 + inference_time: 285.0 + throughput: 3508.7719298245615 estimated_peak_memory_range: - min: 458752 - max: 40876896 + min: 12288 + max: 34430832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +340,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: j0pxeyrj5 + job_id: jnp13rx75 job_status: Passed - torchscript_onnx_ort: - inference_time: 375.0 - throughput: 2666.6666666666665 + torchscript_onnx: + inference_time: 387.0 + throughput: 2583.9793281653747 estimated_peak_memory_range: - min: 12288 - max: 19616240 + min: 458752 + max: 18610432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +355,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jogkr3ky5 + job_id: jegnxj8j5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +364,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:45:26Z' + timestamp: '2024-06-22T22:51:17Z' - torchscript_onnx_tflite: - inference_time: 309.0 - throughput: 3236.2459546925566 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: - min: 12288 - max: 1641680 + min: 24576 + max: 1402120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +378,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jnp1qe1kg + job_id: j1pv4jxrp job_status: Passed torchscript_onnx_qnn: - inference_time: 395.0 - throughput: 2531.6455696202534 + inference_time: 386.0 + throughput: 2590.6735751295337 estimated_peak_memory_range: - min: 290816 - max: 8822944 + min: 462848 + max: 4234344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +393,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jep23lzxg + job_id: jnp13rxn5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,10 +402,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:45:22Z' + timestamp: '2024-06-22T22:51:10Z' + - torchscript_onnx_tflite: + inference_time: 319.0 + throughput: 3134.796238244514 + estimated_peak_memory_range: + min: 16384 + max: 1984816 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 101 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 101 + job_id: jlpe2j3vp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 389.0 + throughput: 2570.694087403599 + estimated_peak_memory_range: + min: 16384 + max: 98050608 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 107 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 107 + job_id: jz576q3ng + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:51:13Z' - torchscript_onnx_qnn: - inference_time: 497.0 - throughput: 2012.0724346076458 + inference_time: 496.0 + throughput: 2016.1290322580646 estimated_peak_memory_range: min: 442368 max: 442368 @@ -376,14 +454,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jegnr3wv5 + job_id: jz5wxjn4p job_status: Passed - torchscript_onnx_ort: - inference_time: 521.0 - throughput: 1919.3857965451057 + torchscript_onnx: + inference_time: 501.0 + throughput: 1996.007984031936 estimated_peak_memory_range: - min: 5312512 - max: 5312512 + min: 3923968 + max: 3923968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j1gle3qep + job_id: jep2j2n65 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +478,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:45:27Z' + timestamp: '2024-06-22T22:51:19Z' diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py index 0100ddad..7e906cdc 100644 --- a/qai_hub_models/models/mediapipe_hand/export.py +++ b/qai_hub_models/models/mediapipe_hand/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml index 67190c80..a2c7fe64 100644 --- a/qai_hub_models/models/mediapipe_hand/perf.yaml +++ b/qai_hub_models/models/mediapipe_hand/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: MediaPipeHandDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 2260.0 - throughput: 442.4778761061947 + inference_time: 2277.0 + throughput: 439.17435221783046 estimated_peak_memory_range: - min: 12288 - max: 11649504 + min: 20480 + max: 4315184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 150 - job_id: jwgoe394p + job_id: j1p88oyqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1017.0 - throughput: 983.284169124877 + inference_time: 1006.0 + throughput: 994.0357852882704 estimated_peak_memory_range: - min: 20480 - max: 21650176 + min: 2113536 + max: 23505176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jmg99wqqg + job_id: j7gj1xn1g job_status: Passed - torchscript_onnx_ort: - inference_time: 1164.0 - throughput: 859.106529209622 + torchscript_onnx: + inference_time: 1176.0 + throughput: 850.3401360544218 estimated_peak_memory_range: min: 12288 - max: 18412096 + max: 21144392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jqp4jv6lp + job_id: jo5m4rx75 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:46:02Z' + timestamp: '2024-06-22T22:52:02Z' - torchscript_onnx_tflite: - inference_time: 1902.0 - throughput: 525.7623554153523 + inference_time: 2100.0 + throughput: 476.1904761904762 estimated_peak_memory_range: - min: 12288 - max: 50595712 + min: 16384 + max: 53620752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 150 - job_id: j7gjke875 + job_id: jn5qw8qe5 job_status: Passed torchscript_onnx_qnn: - inference_time: 722.0 - throughput: 1385.0415512465374 + inference_time: 724.0 + throughput: 1381.2154696132598 estimated_peak_memory_range: min: 802816 - max: 60773680 + max: 52232928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jvgd7omkg + job_id: jygzwed4g job_status: Passed - torchscript_onnx_ort: - inference_time: 837.0 - throughput: 1194.7431302270013 + torchscript_onnx: + inference_time: 838.0 + throughput: 1193.3174224343675 estimated_peak_memory_range: - min: 323584 - max: 36752192 + min: 237568 + max: 30961680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jo5mv31q5 + job_id: jopr9k3kp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:46:04Z' + timestamp: '2024-06-22T22:52:04Z' - torchscript_onnx_tflite: - inference_time: 2331.0 - throughput: 429.000429000429 + inference_time: 2421.0 + throughput: 413.0524576621231 estimated_peak_memory_range: - min: 36864 - max: 2444200 + min: 12288 + max: 1686048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 150 - job_id: jygzvr0zp + job_id: jw56v64np job_status: Passed torchscript_onnx_qnn: - inference_time: 1015.0 - throughput: 985.2216748768473 + inference_time: 1005.0 + throughput: 995.0248756218906 estimated_peak_memory_range: - min: 806912 - max: 10668872 + min: 802816 + max: 22557664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jvgd7omlg + job_id: jvgd0w16p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:46:01Z' + timestamp: '2024-06-22T22:51:57Z' + - torchscript_onnx_tflite: + inference_time: 2277.0 + throughput: 439.17435221783046 + estimated_peak_memory_range: + min: 12288 + max: 5476888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 2 + total_layers: 150 + job_id: jwgomy615 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1020.0 + throughput: 980.3921568627451 + estimated_peak_memory_range: + min: 667648 + max: 120131440 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 197 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 197 + job_id: jqp48qr2g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:51:59Z' - torchscript_onnx_qnn: - inference_time: 1036.0 - throughput: 965.2509652509652 + inference_time: 1144.0 + throughput: 874.1258741258741 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jmg99wqvg + job_id: jmg98vnmp job_status: Passed - torchscript_onnx_ort: - inference_time: 1182.0 - throughput: 846.0236886632825 + torchscript_onnx: + inference_time: 1206.0 + throughput: 829.1873963515754 estimated_peak_memory_range: - min: 704512 - max: 704512 + min: 1572864 + max: 1572864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jopr1emeg + job_id: jqpyne30g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +256,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:46:06Z' + timestamp: '2024-06-22T22:52:06Z' - name: MediaPipeHandLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 1205.0 - throughput: 829.8755186721992 + inference_time: 1212.0 + throughput: 825.0825082508251 estimated_peak_memory_range: - min: 12288 - max: 2551752 + min: 16384 + max: 1722664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +272,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 159 - job_id: j1pvzvn7g + job_id: jogkdzxvp job_status: Passed torchscript_onnx_qnn: - inference_time: 1299.0 - throughput: 769.8229407236336 + inference_time: 1306.0 + throughput: 765.6967840735069 estimated_peak_memory_range: min: 802816 - max: 8940712 + max: 7849944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +287,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 210 - job_id: jnp1qemkg + job_id: jlpe29m8p job_status: Passed - torchscript_onnx_ort: - inference_time: 1521.0 - throughput: 657.4621959237344 + torchscript_onnx: + inference_time: 1563.0 + throughput: 639.7952655150352 estimated_peak_memory_range: - min: 12288 - max: 143178688 + min: 86016 + max: 180242064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +302,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: j0pxey895 + job_id: jegnx2vj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +311,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:46:03Z' + timestamp: '2024-06-22T22:52:02Z' - torchscript_onnx_tflite: - inference_time: 903.0 - throughput: 1107.4197120708748 + inference_time: 902.0 + throughput: 1108.6474501108648 estimated_peak_memory_range: min: 12288 - max: 59093296 + max: 62654048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +325,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 159 - job_id: jlpe4kn75 + job_id: j1gl7nm25 job_status: Passed torchscript_onnx_qnn: - inference_time: 962.0 - throughput: 1039.5010395010395 + inference_time: 955.0 + throughput: 1047.1204188481674 estimated_peak_memory_range: min: 802816 - max: 66542112 + max: 57032672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +340,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 210 - job_id: jz5wmqrjg + job_id: jz5wxo64p job_status: Passed - torchscript_onnx_ort: - inference_time: 1121.0 - throughput: 892.0606601248885 + torchscript_onnx: + inference_time: 1128.0 + throughput: 886.5248226950355 estimated_peak_memory_range: - min: 802816 - max: 30698880 + min: 0 + max: 24398048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +355,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jegnr3dm5 + job_id: jep2j8y65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +364,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:46:05Z' + timestamp: '2024-06-22T22:52:04Z' - torchscript_onnx_tflite: - inference_time: 1196.0 - throughput: 836.1204013377926 + inference_time: 1199.0 + throughput: 834.0283569641368 estimated_peak_memory_range: - min: 28672 - max: 1643304 + min: 12288 + max: 9620000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +378,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 159 - job_id: jz5wmqrzg + job_id: j1p38k0m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1297.0 + throughput: 771.0100231303007 estimated_peak_memory_range: - min: 294912 - max: 52198264 + min: 32768 + max: 12496920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +393,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 210 - job_id: jz57vx1r5 + job_id: jz576zrng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,13 +402,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:46:01Z' + timestamp: '2024-06-22T22:51:57Z' + - torchscript_onnx_tflite: + inference_time: 1211.0 + throughput: 825.7638315441784 + estimated_peak_memory_range: + min: 12288 + max: 1782992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 159 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 159 + job_id: j1pv43kzp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1298.0 + throughput: 770.4160246533128 + estimated_peak_memory_range: + min: 811008 + max: 52503816 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 210 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 210 + job_id: j0pxmvo8g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:52:00Z' - torchscript_onnx_qnn: - inference_time: 1513.0 - throughput: 660.9385327164574 + inference_time: 1501.0 + throughput: 666.2225183211193 estimated_peak_memory_range: - min: 1150976 - max: 1150976 + min: 786432 + max: 786432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -376,14 +454,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jnp1qemlg + job_id: jnp130zn5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1551.0 - throughput: 644.7453255963894 + torchscript_onnx: + inference_time: 1569.0 + throughput: 637.3486297004462 estimated_peak_memory_range: - min: 20062208 - max: 20062208 + min: 22982656 + max: 22982656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jep23lqmg + job_id: j2p0kyz05 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +478,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:46:06Z' + timestamp: '2024-06-22T22:52:06Z' diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py index 4fd3fb91..a5efe339 100644 --- a/qai_hub_models/models/mediapipe_pose/export.py +++ b/qai_hub_models/models/mediapipe_pose/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml index 5bcb6fa5..cf04d903 100644 --- a/qai_hub_models/models/mediapipe_pose/perf.yaml +++ b/qai_hub_models/models/mediapipe_pose/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: MediaPipePoseDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 850.0 - throughput: 1176.4705882352941 + inference_time: 826.0 + throughput: 1210.6537530266344 estimated_peak_memory_range: - min: 32768 - max: 1863416 + min: 16384 + max: 1530448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: j1p8wzd8p + job_id: jn5qw86e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 880.0 - throughput: 1136.3636363636363 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: - min: 2273280 - max: 7352768 + min: 217088 + max: 4696168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jwgoe37dp + job_id: jygzwe34g job_status: Passed - torchscript_onnx_ort: - inference_time: 1001.0 - throughput: 999.000999000999 + torchscript_onnx: + inference_time: 1012.0 + throughput: 988.1422924901186 estimated_peak_memory_range: - min: 471040 - max: 10697640 + min: 77824 + max: 25910224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jvgd7oylg + job_id: jopr9kvkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:46:43Z' + timestamp: '2024-06-22T22:52:50Z' - torchscript_onnx_tflite: - inference_time: 621.0 - throughput: 1610.3059581320451 + inference_time: 615.0 + throughput: 1626.0162601626016 estimated_peak_memory_range: min: 61440 - max: 42407216 + max: 45426960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jn5q93xmp + job_id: jw56v6ynp job_status: Passed torchscript_onnx_qnn: - inference_time: 633.0 - throughput: 1579.778830963665 + inference_time: 631.0 + throughput: 1584.7860538827258 estimated_peak_memory_range: - min: 208896 - max: 48822992 + min: 0 + max: 38267648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j7gjke685 + job_id: jmg98vlmp job_status: Passed - torchscript_onnx_ort: - inference_time: 772.0 - throughput: 1295.3367875647668 + torchscript_onnx: + inference_time: 755.0 + throughput: 1324.5033112582782 estimated_peak_memory_range: min: 212992 - max: 32138320 + max: 28568464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jqp4jv7lp + job_id: jqpyne10g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:46:44Z' + timestamp: '2024-06-22T22:52:52Z' - torchscript_onnx_tflite: - inference_time: 830.0 - throughput: 1204.8192771084337 + inference_time: 828.0 + throughput: 1207.729468599034 estimated_peak_memory_range: - min: 20480 - max: 1868176 + min: 28672 + max: 1564872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jw56qn97g + job_id: jwgomy215 job_status: Passed torchscript_onnx_qnn: - inference_time: 888.0 - throughput: 1126.126126126126 + inference_time: 876.0 + throughput: 1141.552511415525 estimated_peak_memory_range: - min: 16384 - max: 128786224 + min: 225280 + max: 5079312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jmg99w7vg + job_id: jqp48ql2g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:46:41Z' + timestamp: '2024-06-22T22:52:45Z' + - torchscript_onnx_tflite: + inference_time: 826.0 + throughput: 1210.6537530266344 + estimated_peak_memory_range: + min: 24576 + max: 5251472 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 107 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 107 + job_id: j7gj1xv1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 887.0 + throughput: 1127.3957158962796 + estimated_peak_memory_range: + min: 16384 + max: 124531408 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 140 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 140 + job_id: jo5m4rn75 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:52:48Z' - torchscript_onnx_qnn: - inference_time: 1086.0 - throughput: 920.8103130755064 + inference_time: 1001.0 + throughput: 999.000999000999 estimated_peak_memory_range: - min: 1765376 - max: 1765376 + min: 528384 + max: 528384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jygzvrq6p + job_id: jvgd0wx6p job_status: Passed - torchscript_onnx_ort: - inference_time: 1038.0 - throughput: 963.3911368015414 + torchscript_onnx: + inference_time: 1076.0 + throughput: 929.368029739777 estimated_peak_memory_range: - min: 3256320 - max: 3256320 + min: 241664 + max: 241664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jo5mv37q5 + job_id: j1p88o2qp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +256,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:46:46Z' + timestamp: '2024-06-22T22:52:54Z' - name: MediaPipePoseLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 1205.0 - throughput: 829.8755186721992 + inference_time: 1229.0 + throughput: 813.6696501220505 estimated_peak_memory_range: - min: 200704 - max: 2517320 + min: 12288 + max: 3016072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +272,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 230 - job_id: jogkr3wo5 + job_id: j1gl7nv25 job_status: Passed torchscript_onnx_qnn: - inference_time: 1306.0 - throughput: 765.6967840735069 + inference_time: 1340.0 + throughput: 746.2686567164179 estimated_peak_memory_range: - min: 16384 - max: 13996512 + min: 12288 + max: 13332312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +287,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 306 - job_id: j1pvzvymg + job_id: jz5wxoe4p job_status: Passed - torchscript_onnx_ort: - inference_time: 1647.0 - throughput: 607.1645415907711 + torchscript_onnx: + inference_time: 1627.0 + throughput: 614.6281499692686 estimated_peak_memory_range: - min: 12288 - max: 25082496 + min: 16384 + max: 25452864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +302,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 304 - job_id: jz57vxmr5 + job_id: jep2j8k65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +311,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:46:43Z' + timestamp: '2024-06-22T22:52:50Z' - torchscript_onnx_tflite: - inference_time: 864.0 - throughput: 1157.4074074074074 + inference_time: 878.0 + throughput: 1138.9521640091116 estimated_peak_memory_range: - min: 12288 - max: 90560000 + min: 16384 + max: 94818192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +325,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 230 - job_id: j1gle39lp + job_id: j1p38kjm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 944.0 - throughput: 1059.322033898305 + inference_time: 953.0 + throughput: 1049.3179433368311 estimated_peak_memory_range: min: 802816 - max: 88829488 + max: 78260944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +340,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 306 - job_id: jlpe4k005 + job_id: jnp1304n5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1101.0 - throughput: 908.2652134423251 + torchscript_onnx: + inference_time: 1151.0 + throughput: 868.8097306689835 estimated_peak_memory_range: min: 802816 - max: 39260784 + max: 37814912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +355,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 304 - job_id: j0pxeyq95 + job_id: j2p0ky405 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +364,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:46:45Z' + timestamp: '2024-06-22T22:52:52Z' - torchscript_onnx_tflite: - inference_time: 1214.0 - throughput: 823.7232289950576 + inference_time: 1223.0 + throughput: 817.6614881439084 estimated_peak_memory_range: - min: 24576 - max: 2611056 + min: 28672 + max: 2986752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +378,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 230 - job_id: j1p3qelz5 + job_id: j1pv436zp job_status: Passed torchscript_onnx_qnn: - inference_time: 1308.0 - throughput: 764.525993883792 + inference_time: 1304.0 + throughput: 766.8711656441718 estimated_peak_memory_range: - min: 434176 - max: 15229872 + min: 28672 + max: 13222296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +393,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 306 - job_id: jnp1qeklg + job_id: j0pxmvk8g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,10 +402,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:46:41Z' + timestamp: '2024-06-22T22:52:46Z' + - torchscript_onnx_tflite: + inference_time: 1229.0 + throughput: 813.6696501220505 + estimated_peak_memory_range: + min: 12288 + max: 2315272 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 230 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 230 + job_id: jlpe29d8p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1327.0 + throughput: 753.5795026375282 + estimated_peak_memory_range: + min: 49152 + max: 10385584 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 306 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 306 + job_id: jegnx26j5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:52:48Z' - torchscript_onnx_qnn: - inference_time: 1463.0 - throughput: 683.526999316473 + inference_time: 1431.0 + throughput: 698.8120195667366 estimated_peak_memory_range: min: 786432 max: 786432 @@ -376,14 +454,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 305 - job_id: jz5wmq0jg + job_id: jz576zyng job_status: Passed - torchscript_onnx_ort: - inference_time: 1886.0 - throughput: 530.2226935312831 + torchscript_onnx: + inference_time: 1613.0 + throughput: 619.9628022318661 estimated_peak_memory_range: - min: 19697664 - max: 19697664 + min: 14336000 + max: 14336000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 304 - job_id: jegnr34m5 + job_id: jogkdzvvp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +478,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:46:46Z' + timestamp: '2024-06-22T22:52:54Z' diff --git a/qai_hub_models/models/mediapipe_selfie/export.py b/qai_hub_models/models/mediapipe_selfie/export.py index 374affc3..ec25ec48 100644 --- a/qai_hub_models/models/mediapipe_selfie/export.py +++ b/qai_hub_models/models/mediapipe_selfie/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml index 7f0d5fe1..94172481 100644 --- a/qai_hub_models/models/mediapipe_selfie/perf.yaml +++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: MediaPipe-Selfie-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 806.0 - throughput: 1240.6947890818858 + inference_time: 733.0 + throughput: 1364.256480218281 estimated_peak_memory_range: min: 12288 - max: 2385600 + max: 1795456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jqpyv6l4p + job_id: jw56v62np job_status: Passed torchscript_onnx_qnn: - inference_time: 784.0 - throughput: 1275.5102040816328 + inference_time: 769.0 + throughput: 1300.3901170351105 estimated_peak_memory_range: - min: 2240512 - max: 96205696 + min: 802816 + max: 4266008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jogkr31o5 + job_id: j7gj1xd1g job_status: Passed - torchscript_onnx_ort: - inference_time: 1346.0 - throughput: 742.9420505200594 + torchscript_onnx: + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 786432 - max: 76785816 + min: 749568 + max: 5577256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j1p3qedz5 + job_id: jnp1302n5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:47:12Z' + timestamp: '2024-06-22T22:53:24Z' - torchscript_onnx_tflite: - inference_time: 537.0 - throughput: 1862.1973929236499 + inference_time: 501.0 + throughput: 1996.007984031936 estimated_peak_memory_range: min: 12288 - max: 24988016 + max: 26270224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j2p0elwe5 + job_id: j1p38knm5 job_status: Passed torchscript_onnx_qnn: inference_time: 512.0 throughput: 1953.125 estimated_peak_memory_range: - min: 176128 - max: 45965632 + min: 802816 + max: 39868832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jn5q93nmp + job_id: jlpe29o8p job_status: Passed - torchscript_onnx_ort: - inference_time: 904.0 - throughput: 1106.1946902654868 + torchscript_onnx: + inference_time: 924.0 + throughput: 1082.2510822510822 estimated_peak_memory_range: - min: 12288 - max: 20791344 + min: 352256 + max: 19826912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jwgoe3xdp + job_id: jvgd0wn6p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:47:13Z' + timestamp: '2024-06-22T22:53:25Z' - torchscript_onnx_tflite: - inference_time: 803.0 - throughput: 1245.3300124533 + inference_time: 728.0 + throughput: 1373.6263736263736 estimated_peak_memory_range: min: 24576 - max: 1606304 + max: 1854160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j1p8wzn8p + job_id: jwgomyz15 job_status: Passed torchscript_onnx_qnn: - inference_time: 782.0 - throughput: 1278.772378516624 + inference_time: 773.0 + throughput: 1293.6610608020699 estimated_peak_memory_range: - min: 24576 - max: 12402272 + min: 806912 + max: 74241896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jw56qnx7g + job_id: jz5wxow4p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:47:11Z' + timestamp: '2024-06-22T22:53:21Z' + - torchscript_onnx_tflite: + inference_time: 725.0 + throughput: 1379.3103448275863 + estimated_peak_memory_range: + min: 12288 + max: 4593048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 118 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 118 + job_id: j1pv43qzp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 781.0 + throughput: 1280.4097311139565 + estimated_peak_memory_range: + min: 802816 + max: 8862784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jmg98v0mp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:53:23Z' - torchscript_onnx_qnn: - inference_time: 920.0 - throughput: 1086.9565217391305 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j1gle3dlp + job_id: jygzwe24g job_status: Passed - torchscript_onnx_ort: - inference_time: 1362.0 - throughput: 734.2143906020558 + torchscript_onnx: + inference_time: 1342.0 + throughput: 745.156482861401 estimated_peak_memory_range: - min: 2674688 - max: 2674688 + min: 1925120 + max: 1925120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j1pvzv8mg + job_id: jz576z2ng job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:47:14Z' + timestamp: '2024-06-22T22:53:26Z' diff --git a/qai_hub_models/models/midas/app.py b/qai_hub_models/models/midas/app.py index 949c87b1..7de7758e 100644 --- a/qai_hub_models/models/midas/app.py +++ b/qai_hub_models/models/midas/app.py @@ -51,8 +51,7 @@ def estimate_depth( image, (self.input_height, self.input_width) ) image_tensor = transforms.ToTensor()(resized_image).unsqueeze(0) - with torch.no_grad(): - prediction = self.model(image_tensor) + prediction = self.model(image_tensor) prediction = undo_resize_pad( prediction.unsqueeze(0), image.size, scale, padding ) diff --git a/qai_hub_models/models/midas/export.py b/qai_hub_models/models/midas/export.py index adcb9fbe..7d60c6ad 100644 --- a/qai_hub_models/models/midas/export.py +++ b/qai_hub_models/models/midas/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,7 +116,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace( model.to("cpu"), make_torch_inputs(input_spec), check_trace=False ) @@ -124,7 +123,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/midas/model.py b/qai_hub_models/models/midas/model.py index 669ecdd0..c0d9babb 100644 --- a/qai_hub_models/models/midas/model.py +++ b/qai_hub_models/models/midas/model.py @@ -21,7 +21,7 @@ from qai_hub_models.utils.input_spec import InputSpec MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 2 +MODEL_ASSET_VERSION = 3 SOURCE_REPO = "https://github.com/isl-org/MiDaS/" REPO_COMMIT = "bdc4ed64c095e026dc0a2f17cabb14d58263decb" diff --git a/qai_hub_models/models/midas/perf.yaml b/qai_hub_models/models/midas/perf.yaml index 047b62cc..e961f646 100644 --- a/qai_hub_models/models/midas/perf.yaml +++ b/qai_hub_models/models/midas/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Midas-V2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 3428.0 - throughput: 291.71528588098016 + inference_time: 3432.0 + throughput: 291.3752913752914 estimated_peak_memory_range: - min: 12288 - max: 2878504 + min: 16384 + max: 1861632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jlpe4kq05 + job_id: j0pxmv98g job_status: Passed torchscript_onnx_qnn: - inference_time: 3372.0 - throughput: 296.55990510083035 + inference_time: 3375.0 + throughput: 296.2962962962963 estimated_peak_memory_range: min: 806912 - max: 11534808 + max: 11813464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jmg99wrvg + job_id: jep2j8x65 job_status: Passed - torchscript_onnx_ort: - inference_time: 3451.0 - throughput: 289.77108084613155 + torchscript_onnx: + inference_time: 3479.0 + throughput: 287.4389192296637 estimated_peak_memory_range: - min: 12288 - max: 177641176 + min: 0 + max: 128986832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jqp4jvzlp + job_id: jn5qw8ee5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:47:43Z' + timestamp: '2024-06-22T22:53:58Z' - torchscript_onnx_tflite: - inference_time: 2407.0 - throughput: 415.45492314083924 + inference_time: 2410.0 + throughput: 414.9377593360996 estimated_peak_memory_range: min: 12288 - max: 82857536 + max: 88013264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jygzvr66p + job_id: jo5m4re75 job_status: Passed torchscript_onnx_qnn: - inference_time: 2404.0 - throughput: 415.97337770382694 + inference_time: 2396.0 + throughput: 417.3622704507512 estimated_peak_memory_range: min: 802816 - max: 65062640 + max: 56145456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jnp1qe9lg + job_id: jqpynez0g job_status: Passed - torchscript_onnx_ort: - inference_time: 2416.0 - throughput: 413.9072847682119 + torchscript_onnx: + inference_time: 2508.0 + throughput: 398.72408293460927 estimated_peak_memory_range: - min: 389120 - max: 38273760 + min: 483328 + max: 35886496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: j0pxeyw95 + job_id: j1gl7n625 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:47:44Z' + timestamp: '2024-06-22T22:53:59Z' - torchscript_onnx_tflite: - inference_time: 3435.0 - throughput: 291.1208151382824 + inference_time: 3428.0 + throughput: 291.71528588098016 estimated_peak_memory_range: min: 16384 - max: 2408992 + max: 1655616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jz5wmqkjg + job_id: jegnx20j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3369.0 - throughput: 296.8239833778569 + inference_time: 3383.0 + throughput: 295.5956251847473 estimated_peak_memory_range: - min: 802816 - max: 11302408 + min: 806912 + max: 14656904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jz57vxqr5 + job_id: j1p88o0qp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:47:42Z' + timestamp: '2024-06-22T22:53:55Z' + - torchscript_onnx_tflite: + inference_time: 3429.0 + throughput: 291.6302128900554 + estimated_peak_memory_range: + min: 16384 + max: 2350168 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 139 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 139 + job_id: jopr9k6kp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3377.0 + throughput: 296.12081729345573 + estimated_peak_memory_range: + min: 802816 + max: 14381152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 199 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 199 + job_id: jogkdz7vp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:53:57Z' - torchscript_onnx_qnn: - inference_time: 3529.0 - throughput: 283.36639274582035 + inference_time: 3365.0 + throughput: 297.1768202080238 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jvgd7oklg + job_id: j2p0ky305 job_status: Passed - torchscript_onnx_ort: - inference_time: 3447.0 - throughput: 290.1073397156948 + torchscript_onnx: + inference_time: 3439.0 + throughput: 290.7822041291073 estimated_peak_memory_range: - min: 9965568 - max: 9965568 + min: 864256 + max: 864256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jo5mv3jq5 + job_id: jw56v6enp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:47:45Z' + timestamp: '2024-06-22T22:54:00Z' diff --git a/qai_hub_models/models/midas_quantized/README.md b/qai_hub_models/models/midas_quantized/README.md new file mode 100644 index 00000000..56c96394 --- /dev/null +++ b/qai_hub_models/models/midas_quantized/README.md @@ -0,0 +1,61 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [Midas-V2-Quantized: Quantized Deep Convolutional Neural Network model for depth estimation](https://aihub.qualcomm.com/models/midas_quantized) + +Midas is designed for estimating depth at each point in an image. + +This is based on the implementation of Midas-V2-Quantized found +[here](https://github.com/isl-org/MiDaS). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/midas_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on +a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[midas_quantized]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.midas_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.midas_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + +## License +- The license for the original implementation of Midas-V2-Quantized can be found + [here](https://github.com/isl-org/MiDaS/blob/master/LICENSE). +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + +## References +* [Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer](https://arxiv.org/abs/1907.01341v3) +* [Source Model Implementation](https://github.com/isl-org/MiDaS) + +## Community +* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/midas_quantized/__init__.py b/qai_hub_models/models/midas_quantized/__init__.py new file mode 100644 index 00000000..b9d6a15c --- /dev/null +++ b/qai_hub_models/models/midas_quantized/__init__.py @@ -0,0 +1,8 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models.midas.app import MidasApp as App # noqa: F401 + +from .model import MODEL_ID # noqa: F401 +from .model import MidasQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/midas_quantized/conftest.py b/qai_hub_models/models/midas_quantized/conftest.py new file mode 100644 index 00000000..10f869cc --- /dev/null +++ b/qai_hub_models/models/midas_quantized/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.midas_quantized import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/midas_quantized/demo.py b/qai_hub_models/models/midas_quantized/demo.py new file mode 100644 index 00000000..0c152370 --- /dev/null +++ b/qai_hub_models/models/midas_quantized/demo.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models.midas.demo import midas_demo +from qai_hub_models.models.midas_quantized.model import MidasQuantizable + + +def main(is_test: bool = False): + midas_demo(MidasQuantizable, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/midas_quantized/export.py b/qai_hub_models/models/midas_quantized/export.py new file mode 100644 index 00000000..74a435bd --- /dev/null +++ b/qai_hub_models/models/midas_quantized/export.py @@ -0,0 +1,232 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, cast + +import qai_hub as hub + +from qai_hub_models.models.midas_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.base_model import TargetRuntime +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, + transpose_channel_first_to_last, +) +from qai_hub_models.utils.qnn_helpers import get_qnn_inputs + + +def export_model( + device: str = "Samsung Galaxy S23 (Family)", + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[ + str +]: + """ + This function accomplishes 6 main tasks: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format. + 2. Compiles the model to an asset that can be run on device. + 3. Profiles the model performance on real devices. + 4. Inferences the model on sample inputs. + 5. Downloads the model asset to the local directory. + 6. Summarizes the results from profiling and inference. + + Each of the last four steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A 3-tuple of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + """ + model_name = "midas_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if chipset: + hub_device = hub.Device(attributes=f"chipset:{chipset}") + else: + hub_device = hub.Device(name=device) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "midas_quantized", + "Midas-V2-Quantized", + device, + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # 1. Initialize PyTorch model + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec, check_trace=False + ) + if target_runtime == TargetRuntime.TFLITE: + quant_calibration_data = None + else: + quant_calibration_data = model.get_calibration_data(target_runtime, input_spec) + + # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) + channel_last_flags = ( + " --force_channel_last_input image" + if target_runtime != TargetRuntime.ONNX + else "" + ) + + # 2. Compile the model to an on-device asset + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options + channel_last_flags, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=quant_calibration_data, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profile the model asset on real devices + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Run inference on-device with sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs(input_spec) + hub_inputs = sample_inputs + if target_runtime == TargetRuntime.QNN: + hub_inputs = get_qnn_inputs(compile_job, sample_inputs) + # Convert inputs from channel first to channel last + hub_inputs = ( + sample_inputs + if target_runtime == TargetRuntime.ONNX + else transpose_channel_first_to_last("image", sample_inputs, target_runtime) + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=hub_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Download the model asset to a local file + if not skip_downloading: + if target_runtime == TargetRuntime.QNN: + target_runtime_extension = "so" + elif target_runtime == TargetRuntime.TFLITE: + target_runtime_extension = "tflite" + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: + target_runtime_extension = "onnx" + + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download( + str(output_path / f"{model_name}.{target_runtime_extension}") + ) + + # 6. Summarize the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: Dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + torch_out = torch_inference(model, sample_inputs) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + print_inference_metrics(inference_job, inference_result, torch_out) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device) + + return (compile_job, profile_job, inference_job) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/midas_quantized/info.yaml b/qai_hub_models/models/midas_quantized/info.yaml new file mode 100644 index 00000000..921215a4 --- /dev/null +++ b/qai_hub_models/models/midas_quantized/info.yaml @@ -0,0 +1,35 @@ +name: Midas-V2-Quantized +# id must match with the model dir name in qai_hub_models +id: midas_quantized +status: public +headline: Quantized Deep Convolutional Neural Network model for depth estimation. +domain: Computer Vision +use_case: Depth Estimation +description: Midas is designed for estimating depth at each point in an image. +tags: + - quantized +research_paper: https://arxiv.org/abs/1907.01341v3 +research_paper_title: 'Towards Robust Monocular Depth Estimation: Mixing Datasets + for Zero-shot Cross-dataset Transfer' +license: https://github.com/isl-org/MiDaS/blob/master/LICENSE +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/isl-org/MiDaS +technical_details: + Model checkpoint: MiDaS_small + Input resolution: 256x256 + Number of parameters: 16.6M + Model size: 16.6 MB +applicable_scenarios: + - Anomaly Detection + - Inventory Management +related_models: [] +form_factors: + - Phone + - Tablet + - IoT +has_static_banner: yes +has_animated_banner: no +license_type: mit +deploy_license_type: AI Model Hub License +dataset: [] diff --git a/qai_hub_models/models/midas_quantized/model.py b/qai_hub_models/models/midas_quantized/model.py new file mode 100644 index 00000000..a26b6fc2 --- /dev/null +++ b/qai_hub_models/models/midas_quantized/model.py @@ -0,0 +1,103 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, + constrain_quantized_inputs_to_image_range, + tie_observers, + convert_all_depthwise_to_per_tensor, +) + +# isort: on + +import torch +from aimet_torch.batch_norm_fold import fold_all_batch_norms +from aimet_torch.cross_layer_equalization import CrossLayerScaling +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel + +from qai_hub_models.models.midas.model import Midas +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 1 +DEFAULT_ENCODINGS = "midas_quantized_encodings.json" + + +class MidasQuantizable(AIMETQuantizableMixin, Midas): + """Midas with post train quantization support. + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date.""" + + def __init__( + self, + model: QuantizationSimModel, + ) -> None: + # Input is already normalized by sim_model. Disable it in the wrapper model. + Midas.__init__(self, model.model, normalize_input=False) + AIMETQuantizableMixin.__init__( + self, + model, + ) + + @classmethod + def from_pretrained( + cls, + aimet_encodings: str | None = "DEFAULT", + ) -> "MidasQuantizable": + """ + Parameters: + aimet_encodings: + if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette. + elif None: Doesn't load any encodings. Used when computing encodings. + else: Interprets as a filepath and loads the encodings stored there. + """ + model = Midas.from_pretrained() + input_shape = cls.get_input_spec()["image"][0] + dummy_input = torch.rand(input_shape) + + model = prepare_model(model) + fold_all_batch_norms(model, input_shape, dummy_input) + CrossLayerScaling.scale_model(model, input_shape, dummy_input) + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=dummy_input, + ) + convert_all_depthwise_to_per_tensor(sim.model) + tie_observers(sim) + constrain_quantized_inputs_to_image_range(sim) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + sim.load_encodings(aimet_encodings, strict=False) + + return cls(sim) + + def forward(self, image): + """ + Runs the model on an image tensor and returns a tensor of depth estimates + + Parameters: + image: A [1, 3, H, W] image. + Pixel values pre-processed for encoder consumption. + Range: float[0, 1] if self.normalize_input, else ~[-2.5, 2.5] + 3-channel Color Space: RGB + + Returns: + Tensor of depth estimates of size [1, H, W]. + """ + return self.model(image) diff --git a/qai_hub_models/models/midas_quantized/perf.yaml b/qai_hub_models/models/midas_quantized/perf.yaml new file mode 100644 index 00000000..c8b8921d --- /dev/null +++ b/qai_hub_models/models/midas_quantized/perf.yaml @@ -0,0 +1,265 @@ +aggregated: + supported_oses: + - Android + supported_devices: + - Google Pixel 3 + - Google Pixel 3a + - Google Pixel 3a XL + - Google Pixel 4 + - Google Pixel 4a + - Google Pixel 5a 5G + - QCS8250 (Proxy) + - QCS8550 (Proxy) + - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy Tab S8 + - Snapdragon X Elite CRD + - Xiaomi 12 + - Xiaomi 12 Pro + supported_chipsets: + - Qcs8250 + - Qcs8550 + - Sa8540p + - Sa8775p + - Snapdragon® 8 Gen 1 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 3 + - Snapdragon® 888 + - Snapdragon® X Elite +models: +- name: Midas-V2-Quantized + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 1154.0 + throughput: 866.5511265164645 + estimated_peak_memory_range: + min: 12288 + max: 1987952 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: jwgomyk15 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1482.0 + throughput: 674.7638326585695 + estimated_peak_memory_range: + min: 16384 + max: 287985312 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jmg98vjmp + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-06-22T22:55:02Z' + - torchscript_onnx_tflite: + inference_time: 831.0 + throughput: 1203.3694344163657 + estimated_peak_memory_range: + min: 12288 + max: 87642336 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: j1pv430zp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1073.0 + throughput: 931.9664492078285 + estimated_peak_memory_range: + min: 208896 + max: 58965184 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jnp130yn5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-06-22T22:55:03Z' + - torchscript_onnx_tflite: + inference_time: 1161.0 + throughput: 861.3264427217915 + estimated_peak_memory_range: + min: 12288 + max: 1709936 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: j7gj1xz1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1496.0 + throughput: 668.4491978609626 + estimated_peak_memory_range: + min: 151552 + max: 165684208 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jz5wxo2zp + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8550 + timestamp: '2024-06-22T22:55:05Z' + - torchscript_onnx_tflite: + inference_time: 1160.0 + throughput: 862.0689655172414 + estimated_peak_memory_range: + min: 12288 + max: 1648160 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: jlpe29e8p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1493.0 + throughput: 669.7923643670462 + estimated_peak_memory_range: + min: 20480 + max: 11342368 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jmg98vjqp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:55:07Z' + - torchscript_onnx_tflite: + inference_time: 3825.0 + throughput: 261.437908496732 + estimated_peak_memory_range: + min: 12288 + max: 50396464 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: jygzweo4g + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs6490 + timestamp: '2024-06-22T22:54:59Z' + - torchscript_onnx_tflite: + inference_time: 15476.0 + throughput: 64.61617989144482 + estimated_peak_memory_range: + min: 106496 + max: 3734664 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: jz5wxo24p + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8250 + timestamp: '2024-06-22T22:55:00Z' + - torchscript_onnx_qnn: + inference_time: 1521.0 + throughput: 657.4621959237344 + estimated_peak_memory_range: + min: 483328 + max: 483328 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jvgd0we6p + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-06-22T22:55:04Z' diff --git a/qai_hub_models/models/midas_quantized/requirements.txt b/qai_hub_models/models/midas_quantized/requirements.txt new file mode 100644 index 00000000..4942579d --- /dev/null +++ b/qai_hub_models/models/midas_quantized/requirements.txt @@ -0,0 +1,2 @@ +aimet-torch==1.31.2; sys_platform == "linux" +timm==1.0.3 diff --git a/qai_hub_models/models/midas_quantized/test.py b/qai_hub_models/models/midas_quantized/test.py new file mode 100644 index 00000000..032a129e --- /dev/null +++ b/qai_hub_models/models/midas_quantized/test.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np + +from qai_hub_models.models.midas.app import MidasApp +from qai_hub_models.models.midas.demo import INPUT_IMAGE_ADDRESS +from qai_hub_models.models.midas_quantized.demo import main as demo_main +from qai_hub_models.models.midas_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + MidasQuantizable, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image +from qai_hub_models.utils.testing import skip_clone_repo_check + +OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "midas_output.png" +) + + +# Verify that the output from Torch is as expected. +@skip_clone_repo_check +def test_task(): + (_, _, height, width) = MidasQuantizable.get_input_spec()["image"][0] + app = MidasApp(MidasQuantizable.from_pretrained(), height, width) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_image = app.estimate_depth(original_image) + output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) + + np.testing.assert_allclose( + np.asarray(output_image), np.asarray(output_image_oracle), atol=3 + ) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/mnasnet05/export.py b/qai_hub_models/models/mnasnet05/export.py index ed49880c..19e67885 100644 --- a/qai_hub_models/models/mnasnet05/export.py +++ b/qai_hub_models/models/mnasnet05/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml index 0ca342e3..37d95b49 100644 --- a/qai_hub_models/models/mnasnet05/perf.yaml +++ b/qai_hub_models/models/mnasnet05/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: MNASNet05 performance_metrics: - torchscript_onnx_tflite: - inference_time: 782.0 - throughput: 1278.772378516624 + inference_time: 775.0 + throughput: 1290.3225806451612 estimated_peak_memory_range: - min: 20480 - max: 1961704 + min: 24576 + max: 1553160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jlpe4kj05 + job_id: jegnx2mv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 826.0 - throughput: 1210.6537530266344 + inference_time: 823.0 + throughput: 1215.0668286755772 estimated_peak_memory_range: - min: 618496 - max: 5537568 + min: 12288 + max: 35900696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jmg99w6vg + job_id: j2p0ky225 job_status: Passed - torchscript_onnx_ort: - inference_time: 798.0 - throughput: 1253.1328320802006 + torchscript_onnx: + inference_time: 800.0 + throughput: 1250.0 estimated_peak_memory_range: - min: 12288 - max: 155086488 + min: 16384 + max: 19204504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jqp4jvqlp + job_id: jw56v6zvp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:49:16Z' + timestamp: '2024-06-22T22:55:41Z' - torchscript_onnx_tflite: - inference_time: 546.0 - throughput: 1831.5018315018315 + inference_time: 521.0 + throughput: 1919.3857965451057 estimated_peak_memory_range: min: 12288 - max: 46076672 + max: 48642336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jygzvr16p + job_id: jopr9k2vp job_status: Passed torchscript_onnx_qnn: - inference_time: 564.0 - throughput: 1773.049645390071 + inference_time: 565.0 + throughput: 1769.9115044247787 estimated_peak_memory_range: - min: 0 - max: 41703392 + min: 626688 + max: 37475152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jnp1qerlg + job_id: j1p88omzp job_status: Passed - torchscript_onnx_ort: - inference_time: 560.0 - throughput: 1785.7142857142858 + torchscript_onnx: + inference_time: 578.0 + throughput: 1730.1038062283737 estimated_peak_memory_range: - min: 31727616 - max: 59957408 + min: 225280 + max: 21866704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: j0pxeyv95 + job_id: j1p38k1x5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:49:17Z' + timestamp: '2024-06-22T22:55:42Z' - torchscript_onnx_tflite: - inference_time: 773.0 - throughput: 1293.6610608020699 + inference_time: 783.0 + throughput: 1277.139208173691 estimated_peak_memory_range: - min: 12288 - max: 159861568 + min: 16384 + max: 1688792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jz5wmqjjg + job_id: jep2j89x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 826.0 - throughput: 1210.6537530266344 + inference_time: 814.0 + throughput: 1228.5012285012285 estimated_peak_memory_range: - min: 16384 - max: 14027976 + min: 618496 + max: 5453992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jz57vxzr5 + job_id: jn5qw8r75 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:49:15Z' + timestamp: '2024-06-22T22:55:39Z' + - torchscript_onnx_tflite: + inference_time: 781.0 + throughput: 1280.4097311139565 + estimated_peak_memory_range: + min: 24576 + max: 1741208 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 71 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 71 + job_id: jqpynejrg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 820.0 + throughput: 1219.5121951219512 + estimated_peak_memory_range: + min: 16384 + max: 98992176 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 103 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 103 + job_id: j1gl7n2e5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:55:40Z' - torchscript_onnx_qnn: - inference_time: 946.0 - throughput: 1057.0824524312895 + inference_time: 942.0 + throughput: 1061.5711252653928 estimated_peak_memory_range: - min: 606208 - max: 606208 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jvgd7ojlg + job_id: jogkdzqyp job_status: Passed - torchscript_onnx_ort: - inference_time: 807.0 - throughput: 1239.1573729863692 + torchscript_onnx: + inference_time: 810.0 + throughput: 1234.567901234568 estimated_peak_memory_range: - min: 18001920 - max: 18001920 + min: 14974976 + max: 14974976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jo5mv3rq5 + job_id: jwgomyn45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:49:18Z' + timestamp: '2024-06-22T22:55:44Z' diff --git a/qai_hub_models/models/mobilenet_v2/export.py b/qai_hub_models/models/mobilenet_v2/export.py index 3201e32a..23b8df96 100644 --- a/qai_hub_models/models/mobilenet_v2/export.py +++ b/qai_hub_models/models/mobilenet_v2/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v2/model.py b/qai_hub_models/models/mobilenet_v2/model.py index e52bd028..16f8cf09 100644 --- a/qai_hub_models/models/mobilenet_v2/model.py +++ b/qai_hub_models/models/mobilenet_v2/model.py @@ -34,7 +34,6 @@ def from_pretrained(cls, weights: str = MOBILENETV2_WEIGHTS) -> MobileNetV2: k.replace("classifier.1", "classifier"): v for k, v in checkpoint.items() } model.load_state_dict(state_dict) - model.eval() return cls(model) diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml index bdd64620..1a98ff1e 100644 --- a/qai_hub_models/models/mobilenet_v2/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: MobileNet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 940.0 - throughput: 1063.8297872340424 + inference_time: 927.0 + throughput: 1078.7486515641856 estimated_peak_memory_range: - min: 57344 - max: 1721784 + min: 24576 + max: 1350608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jopr1ekeg + job_id: j7gj1x27g job_status: Passed torchscript_onnx_qnn: - inference_time: 1266.0 - throughput: 789.8894154818325 + inference_time: 1247.0 + throughput: 801.924619085806 estimated_peak_memory_range: - min: 622592 - max: 53135336 + min: 16384 + max: 40792520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j2p0elye5 + job_id: jmg98vyqp job_status: Passed - torchscript_onnx_ort: - inference_time: 938.0 - throughput: 1066.0980810234541 + torchscript_onnx: + inference_time: 934.0 + throughput: 1070.6638115631692 estimated_peak_memory_range: - min: 16384 - max: 21567360 + min: 12288 + max: 105144288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j1gle3nlp + job_id: j0pxmv6jg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:49:43Z' + timestamp: '2024-06-22T22:56:13Z' - torchscript_onnx_tflite: - inference_time: 643.0 - throughput: 1555.2099533437015 + inference_time: 615.0 + throughput: 1626.0162601626016 estimated_peak_memory_range: - min: 0 - max: 58244480 + min: 16384 + max: 61192112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jep23l8mg + job_id: jlpe29w7p job_status: Passed torchscript_onnx_qnn: - inference_time: 826.0 - throughput: 1210.6537530266344 + inference_time: 840.0 + throughput: 1190.4761904761904 estimated_peak_memory_range: min: 618496 - max: 40424432 + max: 39385632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j1p8wzo8p + job_id: jnp130wk5 job_status: Passed - torchscript_onnx_ort: - inference_time: 666.0 - throughput: 1501.5015015015015 + torchscript_onnx: + inference_time: 669.0 + throughput: 1494.7683109118086 estimated_peak_memory_range: - min: 487424 - max: 27269952 + min: 540672 + max: 23244304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jw56qn67g + job_id: jo5m4r6y5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:49:44Z' + timestamp: '2024-06-22T22:56:14Z' - torchscript_onnx_tflite: - inference_time: 941.0 - throughput: 1062.6992561105208 + inference_time: 930.0 + throughput: 1075.268817204301 estimated_peak_memory_range: - min: 20480 - max: 1483664 + min: 12288 + max: 11068088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jqpyv6e4p + job_id: jygzwejzg job_status: Passed torchscript_onnx_qnn: - inference_time: 1272.0 - throughput: 786.1635220125786 + inference_time: 1253.0 + throughput: 798.0845969672786 estimated_peak_memory_range: - min: 618496 - max: 41687968 + min: 86016 + max: 7149840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jn5q938mp + job_id: jz576zlqg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:49:42Z' + timestamp: '2024-06-22T22:56:10Z' + - torchscript_onnx_tflite: + inference_time: 928.0 + throughput: 1077.5862068965516 + estimated_peak_memory_range: + min: 12288 + max: 1683024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: jz5wxo3zp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1256.0 + throughput: 796.1783439490446 + estimated_peak_memory_range: + min: 622592 + max: 5426288 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 105 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 105 + job_id: jqp48qdqg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:56:11Z' - torchscript_onnx_qnn: - inference_time: 1555.0 - throughput: 643.0868167202573 + inference_time: 1342.0 + throughput: 745.156482861401 estimated_peak_memory_range: - min: 1355776 - max: 1355776 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jogkr3zo5 + job_id: jvgd0wqkp job_status: Passed - torchscript_onnx_ort: - inference_time: 987.0 - throughput: 1013.1712259371834 + torchscript_onnx: + inference_time: 992.0 + throughput: 1008.0645161290323 estimated_peak_memory_range: - min: 5607424 - max: 5607424 + min: 3272704 + max: 3272704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j1p3qekz5 + job_id: jegnx23v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:49:45Z' + timestamp: '2024-06-22T22:56:15Z' diff --git a/qai_hub_models/models/mobilenet_v2_quantized/export.py b/qai_hub_models/models/mobilenet_v2_quantized/export.py index b025f312..4cafe05d 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v2_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v2_quantized/model.py b/qai_hub_models/models/mobilenet_v2_quantized/model.py index f391a9b4..d884a6c7 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/model.py +++ b/qai_hub_models/models/mobilenet_v2_quantized/model.py @@ -86,5 +86,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml index f7621f54..a3f50962 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: MobileNet-v2-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 291.0 - throughput: 3436.426116838488 + inference_time: 288.0 + throughput: 3472.222222222222 estimated_peak_memory_range: - min: 53248 - max: 1718392 + min: 12288 + max: 1528616 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j1pvzv3mg + job_id: jep2j8lx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 647.0 - throughput: 1545.595054095827 + inference_time: 653.0 + throughput: 1531.3935681470139 estimated_peak_memory_range: - min: 45056 - max: 16933008 + min: 24576 + max: 6402824 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jmg99wvvg - job_status: Passed - torchscript_onnx_ort: - inference_time: 549.0 - throughput: 1821.4936247723133 - estimated_peak_memory_range: - min: 12288 - max: 22837192 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jmg99w1lg + job_id: j1gl7n3e5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:50:23Z' + timestamp: '2024-06-22T22:56:51Z' - torchscript_onnx_tflite: - inference_time: 215.0 - throughput: 4651.162790697675 + inference_time: 234.0 + throughput: 4273.504273504273 estimated_peak_memory_range: min: 12288 - max: 38045216 + max: 39896768 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j7gjkex85 + job_id: jqpyne6rg job_status: Passed torchscript_onnx_qnn: - inference_time: 474.0 - throughput: 2109.7046413502107 + inference_time: 476.0 + throughput: 2100.840336134454 estimated_peak_memory_range: min: 163840 - max: 38345936 + max: 35351600 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jnp1qe0lg - job_status: Passed - torchscript_onnx_ort: - inference_time: 395.0 - throughput: 2531.6455696202534 - estimated_peak_memory_range: - min: 12288 - max: 23651472 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jnp1qel2g + job_id: jw56v6nvp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:50:24Z' + timestamp: '2024-06-22T22:56:52Z' - torchscript_onnx_tflite: - inference_time: 301.0 - throughput: 3322.2591362126245 + inference_time: 295.0 + throughput: 3389.830508474576 estimated_peak_memory_range: min: 12288 - max: 1685448 + max: 1336624 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jlpe4k905 + job_id: j2p0kyl25 job_status: Passed torchscript_onnx_qnn: - inference_time: 654.0 - throughput: 1529.051987767584 + inference_time: 655.0 + throughput: 1526.7175572519084 estimated_peak_memory_range: - min: 16384 - max: 123157128 + min: 32768 + max: 6364808 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jz5wmqv6g + job_id: jwgomy345 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:50:22Z' + timestamp: '2024-06-22T22:56:54Z' - torchscript_onnx_tflite: - inference_time: 850.0 - throughput: 1176.4705882352941 + inference_time: 296.0 + throughput: 3378.3783783783783 estimated_peak_memory_range: min: 12288 - max: 24025456 + max: 1915592 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jygzvre6p + job_id: j1p88ozzp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 664.0 + throughput: 1506.0240963855422 + estimated_peak_memory_range: + min: 16384 + max: 119736200 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 71 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 71 + job_id: j1pv43v7p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:56:56Z' + - torchscript_onnx_tflite: + inference_time: 870.0 + throughput: 1149.4252873563219 + estimated_peak_memory_range: + min: 12288 + max: 25149472 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: jogkdz3yp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T22:50:16Z' + timestamp: '2024-06-22T22:56:48Z' - torchscript_onnx_tflite: - inference_time: 7601.0 - throughput: 131.56163662675965 + inference_time: 7445.0 + throughput: 134.31833445265278 estimated_peak_memory_range: - min: 253952 - max: 8158832 + min: 40960 + max: 7492840 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 2 layers_on_cpu: 0 total_layers: 72 - job_id: jz5wmqojg + job_id: jn5qw8375 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T22:50:17Z' + timestamp: '2024-06-22T22:56:49Z' - torchscript_onnx_qnn: - inference_time: 740.0 - throughput: 1351.3513513513512 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 696320 - max: 696320 + min: 540672 + max: 540672 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jvgd7owlg - job_status: Passed - torchscript_onnx_ort: - inference_time: 554.0 - throughput: 1805.0541516245487 - estimated_peak_memory_range: - min: 20283392 - max: 20283392 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jvgd7o9eg + job_id: j1p38kex5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:50:25Z' + timestamp: '2024-06-22T22:56:53Z' diff --git a/qai_hub_models/models/mobilenet_v3_large/export.py b/qai_hub_models/models/mobilenet_v3_large/export.py index da4d660d..934fc5ab 100644 --- a/qai_hub_models/models/mobilenet_v3_large/export.py +++ b/qai_hub_models/models/mobilenet_v3_large/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml index 8ea50f02..de6f07e1 100644 --- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: MobileNet-v3-Large performance_metrics: - torchscript_onnx_tflite: - inference_time: 999.0 - throughput: 1001.001001001001 + inference_time: 1009.0 + throughput: 991.0802775024777 estimated_peak_memory_range: - min: 16384 - max: 1600024 + min: 12288 + max: 1911392 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 136 + layers_on_npu: 128 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 136 - job_id: jqp4jvovp + total_layers: 128 + job_id: jvgd0wokp job_status: Passed torchscript_onnx_qnn: - inference_time: 1048.0 - throughput: 954.1984732824427 + inference_time: 1045.0 + throughput: 956.9377990430622 estimated_peak_memory_range: - min: 647168 - max: 48048184 + min: 16384 + max: 59446776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jegnr3yr5 + job_id: jo5m4r3y5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1039.0 - throughput: 962.4639076034649 + torchscript_onnx: + inference_time: 1079.0 + throughput: 926.7840593141798 estimated_peak_memory_range: min: 12288 - max: 82696432 + max: 82439152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 162 - job_id: j2p0elq65 + job_id: j2p0kyr25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,28 +89,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:50:50Z' + timestamp: '2024-06-22T22:57:31Z' - torchscript_onnx_tflite: - inference_time: 703.0 - throughput: 1422.475106685633 + inference_time: 686.0 + throughput: 1457.725947521866 estimated_peak_memory_range: - min: 12288 - max: 62391952 + min: 16384 + max: 64600528 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 136 + layers_on_npu: 128 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 136 - job_id: j0pxeyj15 + total_layers: 128 + job_id: jz576zxqg job_status: Passed torchscript_onnx_qnn: - inference_time: 718.0 - throughput: 1392.757660167131 + inference_time: 715.0 + throughput: 1398.6013986013986 estimated_peak_memory_range: - min: 618496 - max: 51941056 + min: 0 + max: 47907456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jopr1eq9g + job_id: jegnx2ev5 job_status: Passed - torchscript_onnx_ort: - inference_time: 713.0 - throughput: 1402.5245441795232 + torchscript_onnx: + inference_time: 745.0 + throughput: 1342.2818791946308 estimated_peak_memory_range: - min: 618496 - max: 29120336 + min: 12288 + max: 20541504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 162 - job_id: j1p8wz9xp + job_id: j1p88o7zp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,28 +142,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:50:51Z' + timestamp: '2024-06-22T22:57:32Z' - torchscript_onnx_tflite: - inference_time: 1001.0 - throughput: 999.000999000999 + inference_time: 1006.0 + throughput: 994.0357852882704 estimated_peak_memory_range: - min: 45056 - max: 1507408 + min: 32768 + max: 1979760 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 136 + layers_on_npu: 128 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 136 - job_id: jo5mv32w5 + total_layers: 128 + job_id: jqp48qvqg job_status: Passed torchscript_onnx_qnn: - inference_time: 1042.0 - throughput: 959.6928982725528 + inference_time: 1028.0 + throughput: 972.7626459143969 estimated_peak_memory_range: - min: 626688 - max: 69049656 + min: 217088 + max: 68997232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jqpyv6w7p + job_id: jep2j8mx5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:50:49Z' + timestamp: '2024-06-22T22:57:29Z' + - torchscript_onnx_tflite: + inference_time: 1014.0 + throughput: 986.1932938856016 + estimated_peak_memory_range: + min: 12288 + max: 1724600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: j0pxmvyjg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1046.0 + throughput: 956.0229445506692 + estimated_peak_memory_range: + min: 0 + max: 69030600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 144 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 144 + job_id: jqpynedrg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:57:30Z' - torchscript_onnx_qnn: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1156.0 + throughput: 865.0519031141869 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jep23l64g + job_id: jopr9kyvp job_status: Passed - torchscript_onnx_ort: - inference_time: 1086.0 - throughput: 920.8103130755064 + torchscript_onnx: + inference_time: 1066.0 + throughput: 938.0863039399625 estimated_peak_memory_range: - min: 51040256 - max: 51040256 + min: 51892224 + max: 51892224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 162 - job_id: jogkr3n25 + job_id: jogkdzyyp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:50:52Z' + timestamp: '2024-06-22T22:57:33Z' diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py index 7948d791..8d733eb7 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py index fdcf83ec..b13a9d4c 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py @@ -78,5 +78,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml index b5cebb9b..e0a45276 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: MobileNet-v3-Large-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 371.0 - throughput: 2695.4177897574123 + inference_time: 359.0 + throughput: 2785.515320334262 estimated_peak_memory_range: - min: 16384 - max: 1268000 + min: 24576 + max: 1507232 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j1gle3z8p + job_id: j1gl7nke5 job_status: Passed torchscript_onnx_qnn: inference_time: 622.0 throughput: 1607.717041800643 estimated_peak_memory_range: min: 16384 - max: 12184136 + max: 63969296 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j7gjkemx5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 5186.0 - throughput: 192.8268414963363 - estimated_peak_memory_range: - min: 18886656 - max: 272750360 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 24 - total_layers: 171 - job_id: jmg99welg + job_id: jlpe29v7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:51:35Z' + timestamp: '2024-06-22T22:58:15Z' - torchscript_onnx_tflite: - inference_time: 255.0 - throughput: 3921.5686274509803 + inference_time: 254.0 + throughput: 3937.0078740157483 estimated_peak_memory_range: min: 12288 - max: 48279952 + max: 51307808 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jw56qnj0g + job_id: jw56v61vp job_status: Passed torchscript_onnx_qnn: - inference_time: 451.0 - throughput: 2217.2949002217297 + inference_time: 450.0 + throughput: 2222.222222222222 estimated_peak_memory_range: min: 163840 - max: 50970896 + max: 41166592 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jlpe4k115 - job_status: Passed - torchscript_onnx_ort: - inference_time: 4385.0 - throughput: 228.05017103762827 - estimated_peak_memory_range: - min: 17133568 - max: 61050864 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 24 - total_layers: 171 - job_id: jnp1qex2g + job_id: jygzwe7zg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:51:36Z' + timestamp: '2024-06-22T22:58:16Z' - torchscript_onnx_tflite: - inference_time: 353.0 - throughput: 2832.8611898016998 + inference_time: 356.0 + throughput: 2808.9887640449438 estimated_peak_memory_range: min: 12288 - max: 2106960 + max: 1436016 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j1p3qe3l5 + job_id: j1p38kmx5 job_status: Passed torchscript_onnx_qnn: inference_time: 626.0 throughput: 1597.444089456869 estimated_peak_memory_range: - min: 184320 - max: 6302512 + min: 12288 + max: 15664936 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5wmqn6g + job_id: jmg98v4qp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:51:34Z' + timestamp: '2024-06-22T22:58:19Z' - torchscript_onnx_tflite: - inference_time: 1170.0 - throughput: 854.7008547008547 + inference_time: 351.0 + throughput: 2849.002849002849 estimated_peak_memory_range: min: 12288 - max: 28920160 + max: 1946592 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jwgoe30xp + job_id: jwgomyv45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 624.0 + throughput: 1602.5641025641025 + estimated_peak_memory_range: + min: 16384 + max: 7871216 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 126 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 126 + job_id: jnp1308k5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:58:20Z' + - torchscript_onnx_tflite: + inference_time: 1178.0 + throughput: 848.8964346349745 + estimated_peak_memory_range: + min: 12288 + max: 31747312 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 135 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 135 + job_id: j1pv43w7p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T22:51:29Z' + timestamp: '2024-06-22T22:58:13Z' - torchscript_onnx_tflite: - inference_time: 6878.0 - throughput: 145.39110206455365 + inference_time: 6759.0 + throughput: 147.95088030773783 estimated_peak_memory_range: - min: 45056 - max: 2149272 + min: 16384 + max: 2477520 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j1pvzvojg + job_id: j7gj1xl7g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T22:51:30Z' + timestamp: '2024-06-22T22:58:14Z' - torchscript_onnx_qnn: - inference_time: 716.0 - throughput: 1396.6480446927374 + inference_time: 715.0 + throughput: 1398.6013986013986 estimated_peak_memory_range: - min: 643072 - max: 643072 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jygzvr9kp - job_status: Passed - torchscript_onnx_ort: - inference_time: 4701.0 - throughput: 212.72069772388852 - estimated_peak_memory_range: - min: 26042368 - max: 26042368 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 24 - total_layers: 171 - job_id: jvgd7oleg + job_id: jz5wxo9zp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:51:37Z' + timestamp: '2024-06-22T22:58:18Z' diff --git a/qai_hub_models/models/mobilenet_v3_small/export.py b/qai_hub_models/models/mobilenet_v3_small/export.py index f7fe3203..327d69c9 100644 --- a/qai_hub_models/models/mobilenet_v3_small/export.py +++ b/qai_hub_models/models/mobilenet_v3_small/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml index 10857f0f..1ac1f53d 100644 --- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: MobileNet-v3-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 835.0 - throughput: 1197.6047904191616 + inference_time: 844.0 + throughput: 1184.8341232227488 estimated_peak_memory_range: - min: 16384 - max: 1873408 + min: 28672 + max: 1989392 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 115 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jqp4jv0vp + total_layers: 115 + job_id: jopr9k4vp job_status: Passed torchscript_onnx_qnn: - inference_time: 882.0 - throughput: 1133.7868480725624 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: - min: 16384 - max: 13725872 + min: 622592 + max: 4861784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jegnr38r5 + job_id: j1p88o3zp job_status: Passed - torchscript_onnx_ort: - inference_time: 824.0 - throughput: 1213.5922330097087 + torchscript_onnx: + inference_time: 835.0 + throughput: 1197.6047904191616 estimated_peak_memory_range: - min: 12288 - max: 57762312 + min: 81920 + max: 86316544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j2p0el765 + job_id: j1p38k4x5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,28 +89,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:52:03Z' + timestamp: '2024-06-22T22:58:55Z' - torchscript_onnx_tflite: - inference_time: 547.0 - throughput: 1828.1535648994516 + inference_time: 557.0 + throughput: 1795.3321364452424 estimated_peak_memory_range: - min: 12288 - max: 42129856 + min: 16384 + max: 44831456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 115 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: j0pxey215 + total_layers: 115 + job_id: jep2j87x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 583.0 - throughput: 1715.2658662092624 + inference_time: 584.0 + throughput: 1712.3287671232877 estimated_peak_memory_range: min: 0 - max: 47338784 + max: 42092592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jopr1ej9g + job_id: jogkdzlyp job_status: Passed - torchscript_onnx_ort: - inference_time: 586.0 - throughput: 1706.4846416382252 + torchscript_onnx: + inference_time: 592.0 + throughput: 1689.1891891891892 estimated_peak_memory_range: - min: 524288 - max: 27846320 + min: 618496 + max: 25216640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j1p8wzvxp + job_id: jwgomy145 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,28 +142,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:52:04Z' + timestamp: '2024-06-22T22:58:57Z' - torchscript_onnx_tflite: - inference_time: 832.0 - throughput: 1201.923076923077 + inference_time: 841.0 + throughput: 1189.0606420927468 estimated_peak_memory_range: - min: 24576 - max: 2336768 + min: 12288 + max: 1935832 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 115 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jo5mv3yw5 + total_layers: 115 + job_id: jqpyne4rg job_status: Passed torchscript_onnx_qnn: - inference_time: 867.0 - throughput: 1153.4025374855826 + inference_time: 865.0 + throughput: 1156.0693641618498 estimated_peak_memory_range: - min: 12288 - max: 35394896 + min: 16384 + max: 156761368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jqpyv607p + job_id: j1gl7n0e5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:52:02Z' + timestamp: '2024-06-22T22:58:53Z' + - torchscript_onnx_tflite: + inference_time: 844.0 + throughput: 1184.8341232227488 + estimated_peak_memory_range: + min: 16384 + max: 1887288 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 115 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 115 + job_id: j2p0ky125 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 871.0 + throughput: 1148.105625717566 + estimated_peak_memory_range: + min: 40960 + max: 35138480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 126 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 126 + job_id: jw56v63vp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T22:58:54Z' - torchscript_onnx_qnn: - inference_time: 1018.0 - throughput: 982.3182711198428 + inference_time: 979.0 + throughput: 1021.4504596527069 estimated_peak_memory_range: - min: 1249280 - max: 1249280 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jep23ln4g + job_id: jn5qw8775 job_status: Passed - torchscript_onnx_ort: + torchscript_onnx: inference_time: 879.0 throughput: 1137.6564277588168 estimated_peak_memory_range: - min: 16596992 - max: 16596992 + min: 16412672 + max: 16412672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jogkr3m25 + job_id: j1pv4317p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:52:05Z' + timestamp: '2024-06-22T22:58:58Z' diff --git a/qai_hub_models/models/openai_clip/app.py b/qai_hub_models/models/openai_clip/app.py index 3df122c1..ebdbd415 100644 --- a/qai_hub_models/models/openai_clip/app.py +++ b/qai_hub_models/models/openai_clip/app.py @@ -60,10 +60,9 @@ def predict_similarity( by doing a transpose. """ - with torch.no_grad(): - image_features = self.image_encoder(image) - text_features = self.text_encoder(text) - logits_per_image = image_features @ text_features.t() + image_features = self.image_encoder(image) + text_features = self.text_encoder(text) + logits_per_image = image_features @ text_features.t() return logits_per_image.cpu().numpy() def process_image(self, image: Image) -> torch.Tensor: diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py index 08ce7d6a..29a5701b 100644 --- a/qai_hub_models/models/openai_clip/export.py +++ b/qai_hub_models/models/openai_clip/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/openai_clip/model.py b/qai_hub_models/models/openai_clip/model.py index 003ae533..a3664c47 100644 --- a/qai_hub_models/models/openai_clip/model.py +++ b/qai_hub_models/models/openai_clip/model.py @@ -59,7 +59,6 @@ def from_pretrained(): @staticmethod def from_source_model(net, preprocess, tokenizer_func): - net = net.eval() text_encoder = ClipTextEncoder(net) image_encoder = ClipImageEncoder(net) return Clip(text_encoder, image_encoder, preprocess, tokenizer_func) diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml index 785f83c0..0665b1af 100644 --- a/qai_hub_models/models/openai_clip/perf.yaml +++ b/qai_hub_models/models/openai_clip/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: CLIPTextEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 13293.0 - throughput: 75.22756337922215 + inference_time: 10955.0 + throughput: 91.28251939753537 estimated_peak_memory_range: - min: 20480 - max: 3340864 + min: 65536 + max: 3179896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: j1gle3r8p + job_id: jlpe29r7p job_status: Passed torchscript_onnx_qnn: - inference_time: 7810.0 - throughput: 128.04097311139566 + inference_time: 7761.0 + throughput: 128.84937508053085 estimated_peak_memory_range: - min: 24576 - max: 31351376 + min: 12288 + max: 18107232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jlpe4k315 + job_id: jnp130dl5 job_status: Passed - torchscript_onnx_ort: - inference_time: 31397.0 - throughput: 31.850176768481067 + torchscript_onnx: + inference_time: 31659.0 + throughput: 31.586594649230868 estimated_peak_memory_range: - min: 57344 - max: 324810128 + min: 16384 + max: 323261448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 389 - job_id: j0pxe1o15 + job_id: j2p0kyee5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:53:04Z' + timestamp: '2024-06-22T23:00:04Z' - torchscript_onnx_tflite: - inference_time: 9408.0 - throughput: 106.29251700680273 + inference_time: 7727.0 + throughput: 129.41633234114144 estimated_peak_memory_range: - min: 36864 - max: 211531120 + min: 16384 + max: 217754880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: j1p3qe2l5 + job_id: jz5wxodzp job_status: Passed torchscript_onnx_qnn: - inference_time: 5496.0 - throughput: 181.9505094614265 + inference_time: 5506.0 + throughput: 181.62005085361423 estimated_peak_memory_range: min: 12288 - max: 143518544 + max: 128093120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jz5wm966g + job_id: jz576zvrg job_status: Passed - torchscript_onnx_ort: - inference_time: 22333.0 - throughput: 44.776787713249455 + torchscript_onnx: + inference_time: 22386.0 + throughput: 44.67077637809345 estimated_peak_memory_range: - min: 36864 - max: 188583968 + min: 53248 + max: 177699248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 389 - job_id: jegnrevr5 + job_id: jogkdzrop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:53:06Z' + timestamp: '2024-06-22T23:00:06Z' - torchscript_onnx_tflite: - inference_time: 13221.0 - throughput: 75.6372437788367 + inference_time: 10895.0 + throughput: 91.78522257916475 estimated_peak_memory_range: - min: 40960 - max: 2903592 + min: 45056 + max: 3257704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: j1pvzvxjg + job_id: jnp130dk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7775.0 - throughput: 128.61736334405145 + inference_time: 7779.0 + throughput: 128.5512276642242 estimated_peak_memory_range: - min: 16384 - max: 18711280 + min: 28672 + max: 17653280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jz57vdrl5 + job_id: jegnx2rm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:53:02Z' + timestamp: '2024-06-22T22:59:59Z' + - torchscript_onnx_tflite: + inference_time: 10968.0 + throughput: 91.17432530999271 + estimated_peak_memory_range: + min: 36864 + max: 3506584 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 574 + layers_on_gpu: 0 + layers_on_cpu: 2 + total_layers: 576 + job_id: jz5wxodjp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7788.0 + throughput: 128.40267077555214 + estimated_peak_memory_range: + min: 49152 + max: 25359208 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 377 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 377 + job_id: jep2j83m5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:00:02Z' - torchscript_onnx_qnn: - inference_time: 8431.0 - throughput: 118.60989206499822 + inference_time: 8248.0 + throughput: 121.24151309408342 estimated_peak_memory_range: - min: 159744 - max: 159744 + min: 155648 + max: 155648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jnp1q8z2g + job_id: j0pxmve9g job_status: Passed - torchscript_onnx_ort: - inference_time: 32547.0 - throughput: 30.724797984453254 + torchscript_onnx: + inference_time: 32528.0 + throughput: 30.74274471224791 estimated_peak_memory_range: - min: 40755200 - max: 40755200 + min: 37928960 + max: 37928960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 389 - job_id: jep23my4g + job_id: j1gl7nel5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +256,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:53:08Z' + timestamp: '2024-06-22T23:00:08Z' - name: CLIPImageEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 126539.0 - throughput: 7.902701933791163 + inference_time: 66863.0 + throughput: 14.955954713369128 estimated_peak_memory_range: - min: 0 - max: 273708336 + min: 16384 + max: 3191736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +272,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 576 - job_id: jw56qnl0g + job_id: jygzwexzg job_status: Passed torchscript_onnx_qnn: - inference_time: 50274.0 - throughput: 19.890997334606357 + inference_time: 50137.0 + throughput: 19.94534974170772 estimated_peak_memory_range: - min: 126976 - max: 66170792 + min: 90112 + max: 65745680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,7 +287,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 371 - job_id: jygzvrkkp + job_id: jvgd0wrlp + job_status: Passed + torchscript_onnx: + inference_time: 170413.0 + throughput: 5.868096917488689 + estimated_peak_memory_range: + min: 126976 + max: 546526336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 382 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 382 + job_id: j1p88ow8p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -256,13 +311,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:52:57Z' + timestamp: '2024-06-22T23:00:05Z' - torchscript_onnx_tflite: - inference_time: 96320.0 - throughput: 10.382059800664452 + inference_time: 49213.0 + throughput: 20.31983419015301 estimated_peak_memory_range: - min: 188416 - max: 752672896 + min: 32768 + max: 741033104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -270,14 +325,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 576 - job_id: jwgoe3qxp + job_id: jmg98v3qp job_status: Passed torchscript_onnx_qnn: - inference_time: 37784.0 - throughput: 26.46622909167902 + inference_time: 37559.0 + throughput: 26.62477701749248 estimated_peak_memory_range: - min: 634880 - max: 197848448 + min: 659456 + max: 177316416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +340,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 371 - job_id: jmg994nlg + job_id: jqp48qjlg job_status: Passed - torchscript_onnx_ort: - inference_time: 129578.0 - throughput: 7.717359428298013 + torchscript_onnx: + inference_time: 127814.0 + throughput: 7.823869059727416 estimated_peak_memory_range: - min: 659456 - max: 1273480192 + min: 0 + max: 1299304416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,7 +355,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jopr1y39g + job_id: jn5qw89m5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -309,13 +364,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:53:07Z' + timestamp: '2024-06-22T23:00:07Z' - torchscript_onnx_tflite: - inference_time: 125864.0 - throughput: 7.945083582279286 + inference_time: 65049.0 + throughput: 15.373026487724639 estimated_peak_memory_range: - min: 143360 - max: 4010376 + min: 102400 + max: 3521536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -323,14 +378,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 576 - job_id: j7gjke4x5 + job_id: jvgd0wrkp job_status: Passed torchscript_onnx_qnn: - inference_time: 50577.0 - throughput: 19.771833046641753 + inference_time: 50151.0 + throughput: 19.939781858786464 estimated_peak_memory_range: - min: 77824 - max: 66028648 + min: 0 + max: 56606256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,7 +393,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 371 - job_id: jqp4jwrvp + job_id: jopr9k1ep job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -347,28 +402,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:53:03Z' + timestamp: '2024-06-22T23:00:00Z' + - torchscript_onnx_tflite: + inference_time: 65877.0 + throughput: 15.17980478771043 + estimated_peak_memory_range: + min: 122880 + max: 3769616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 576 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 576 + job_id: jmg98v3vp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 50614.0 + throughput: 19.757379381198877 + estimated_peak_memory_range: + min: 81920 + max: 65787296 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 371 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 371 + job_id: jqpynev4g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:00:02Z' - torchscript_onnx_qnn: - inference_time: 48611.0 - throughput: 20.57147559194421 + inference_time: 36053.0 + throughput: 27.736942834160818 estimated_peak_memory_range: min: 602112 max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 369 + layers_on_npu: 370 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 369 - job_id: jvgd7v1eg + total_layers: 370 + job_id: jo5m4rvq5 job_status: Passed - torchscript_onnx_ort: - inference_time: 168455.0 - throughput: 5.936303463833071 + torchscript_onnx: + inference_time: 169928.0 + throughput: 5.8848453462643 estimated_peak_memory_range: - min: 468086784 - max: 468086784 + min: 536092672 + max: 536092672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -376,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jqpyvd37p + job_id: jw56v6q7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -385,4 +478,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:53:08Z' + timestamp: '2024-06-22T23:00:09Z' diff --git a/qai_hub_models/models/openpose/export.py b/qai_hub_models/models/openpose/export.py index 18061fc5..96527162 100644 --- a/qai_hub_models/models/openpose/export.py +++ b/qai_hub_models/models/openpose/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,14 +117,13 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0,output_1" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -210,7 +209,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0,output_1", inference_result, target_runtime ) diff --git a/qai_hub_models/models/openpose/model.py b/qai_hub_models/models/openpose/model.py index 6379a270..80835074 100644 --- a/qai_hub_models/models/openpose/model.py +++ b/qai_hub_models/models/openpose/model.py @@ -8,7 +8,11 @@ import torch -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, SourceAsRoot +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + SourceAsRoot, + wipe_sys_modules, +) from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.input_spec import InputSpec @@ -93,8 +97,7 @@ def forward(self, image: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: im = img_padded.permute(2, 0, 1).unsqueeze(0) - 0.5 # Run the model - with torch.no_grad(): - paf, heatmap = self.model(im) + paf, heatmap = self.model(im) return paf, heatmap @@ -131,9 +134,13 @@ def _load_openpose_source_model_from_weights( MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_WEIGHTS ).fetch() + import src + + wipe_sys_modules(src) + # Import model files from pytorch openpose repo from src.body import Body body_estimation = Body(weights_path_body) - return body_estimation.model.eval() + return body_estimation.model diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml index 30e2f47e..64964053 100644 --- a/qai_hub_models/models/openpose/perf.yaml +++ b/qai_hub_models/models/openpose/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: OpenPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 12008.0 - throughput: 83.27781479013991 + inference_time: 11731.0 + throughput: 85.24422470377633 estimated_peak_memory_range: - min: 217088 - max: 2747920 + min: 196608 + max: 120661344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jogkryx25 + job_id: j1pv43zmp job_status: Passed torchscript_onnx_qnn: - inference_time: 11771.0 - throughput: 84.95454931611587 + inference_time: 11790.0 + throughput: 84.81764206955047 estimated_peak_memory_range: min: 45056 - max: 240267896 + max: 230031400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jw56q140g + job_id: jz5wxomjp job_status: Passed - torchscript_onnx_ort: - inference_time: 11936.0 - throughput: 83.78016085790885 + torchscript_onnx: + inference_time: 11939.0 + throughput: 83.75910880308234 estimated_peak_memory_range: - min: 0 - max: 374382256 + min: 12288 + max: 448015016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j7gjklnx5 + job_id: jqp48q8lg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:55:03Z' + timestamp: '2024-06-22T23:02:13Z' - torchscript_onnx_tflite: - inference_time: 8742.0 - throughput: 114.39029970258522 + inference_time: 8755.0 + throughput: 114.22044545973729 estimated_peak_memory_range: - min: 12288 - max: 33837760 + min: 212992 + max: 38756416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jn5q92q4p + job_id: j7gj1xk8g job_status: Passed torchscript_onnx_qnn: - inference_time: 8755.0 - throughput: 114.22044545973729 + inference_time: 8767.0 + throughput: 114.06410402646287 estimated_peak_memory_range: - min: 618496 - max: 53012064 + min: 724992 + max: 44317808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j1p3qm0l5 + job_id: jmg98v9vp job_status: Passed - torchscript_onnx_ort: - inference_time: 9006.0 - throughput: 111.0370863868532 + torchscript_onnx: + inference_time: 8927.0 + throughput: 112.0197154699227 estimated_peak_memory_range: - min: 700416 - max: 31196368 + min: 618496 + max: 25483424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jlpe4vm15 + job_id: j0pxmvm9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:55:04Z' + timestamp: '2024-06-22T23:02:14Z' - torchscript_onnx_tflite: - inference_time: 11695.0 - throughput: 85.50662676357418 + inference_time: 11730.0 + throughput: 85.25149190110827 estimated_peak_memory_range: - min: 196608 - max: 2975008 + min: 217088 + max: 2218456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j1glekm8p + job_id: jlpe2940p job_status: Passed torchscript_onnx_qnn: - inference_time: 11765.0 - throughput: 84.99787505312368 + inference_time: 11826.0 + throughput: 84.5594452900389 estimated_peak_memory_range: min: 12288 - max: 229599440 + max: 229489592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j1pvzwkjg + job_id: jvgd0w7lp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:55:02Z' + timestamp: '2024-06-22T23:02:10Z' + - torchscript_onnx_tflite: + inference_time: 11710.0 + throughput: 85.39709649871904 + estimated_peak_memory_range: + min: 225280 + max: 2674184 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 103 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 103 + job_id: jygzwev6g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 11788.0 + throughput: 84.8320325755005 + estimated_peak_memory_range: + min: 36864 + max: 240637648 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 186 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 186 + job_id: jz576z6rg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:02:11Z' - torchscript_onnx_qnn: - inference_time: 14100.0 - throughput: 70.92198581560284 + inference_time: 12303.0 + throughput: 81.28098837681866 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jwgoev6xp + job_id: jnp130ql5 job_status: Passed - torchscript_onnx_ort: - inference_time: 12365.0 - throughput: 80.87343307723413 + torchscript_onnx: + inference_time: 12373.0 + throughput: 80.82114281095934 estimated_peak_memory_range: - min: 88932352 - max: 88932352 + min: 93327360 + max: 93327360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jygzv7dkp + job_id: jo5m4r4q5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:55:05Z' + timestamp: '2024-06-22T23:02:15Z' diff --git a/qai_hub_models/models/posenet_mobilenet/app.py b/qai_hub_models/models/posenet_mobilenet/app.py index 72933415..b0f03508 100644 --- a/qai_hub_models/models/posenet_mobilenet/app.py +++ b/qai_hub_models/models/posenet_mobilenet/app.py @@ -556,14 +556,13 @@ def predict_pose_keypoints( tensor = tensor.reshape(1, 3, self.input_height, self.input_width) np.save("build/posenet_inputs", tensor.numpy()) - with torch.no_grad(): - ( - heatmaps_result, - offsets_result, - displacement_fwd_result, - displacement_bwd_result, - max_vals, - ) = self.model(tensor) + ( + heatmaps_result, + offsets_result, + displacement_fwd_result, + displacement_bwd_result, + max_vals, + ) = self.model(tensor) pose_scores, keypoint_scores, keypoint_coords = decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), diff --git a/qai_hub_models/models/posenet_mobilenet/export.py b/qai_hub_models/models/posenet_mobilenet/export.py index e178937d..9e1a33bd 100644 --- a/qai_hub_models/models/posenet_mobilenet/export.py +++ b/qai_hub_models/models/posenet_mobilenet/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/posenet_mobilenet/model.py b/qai_hub_models/models/posenet_mobilenet/model.py index c4f17782..aec731c2 100644 --- a/qai_hub_models/models/posenet_mobilenet/model.py +++ b/qai_hub_models/models/posenet_mobilenet/model.py @@ -60,7 +60,7 @@ def from_pretrained( model = posenet.load_model(model_id) - return cls(model).eval() + return cls(model) def forward(self, image): """ diff --git a/qai_hub_models/models/posenet_mobilenet/perf.yaml b/qai_hub_models/models/posenet_mobilenet/perf.yaml index e405cdc0..d56b85f5 100644 --- a/qai_hub_models/models/posenet_mobilenet/perf.yaml +++ b/qai_hub_models/models/posenet_mobilenet/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Posenet-Mobilenet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1387.0 - throughput: 720.9805335255949 + inference_time: 1412.0 + throughput: 708.2152974504249 estimated_peak_memory_range: - min: 12288 - max: 1654968 + min: 16384 + max: 1532880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jmg994llg + job_id: jopr9k9ep job_status: Passed torchscript_onnx_qnn: - inference_time: 1439.0 - throughput: 694.9270326615705 + inference_time: 1450.0 + throughput: 689.6551724137931 estimated_peak_memory_range: - min: 20480 - max: 24010176 + min: 16384 + max: 13707448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jz57vdyl5 + job_id: j1p88o88p job_status: Passed - torchscript_onnx_ort: - inference_time: 2086.0 - throughput: 479.3863854266539 + torchscript_onnx: + inference_time: 2131.0 + throughput: 469.2632566870014 estimated_peak_memory_range: - min: 12288 - max: 25676680 + min: 8192 + max: 30695984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jegnre6r5 + job_id: j1p38k8z5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:55:33Z' + timestamp: '2024-06-22T23:02:46Z' - torchscript_onnx_tflite: - inference_time: 977.0 - throughput: 1023.5414534288639 + inference_time: 970.0 + throughput: 1030.9278350515465 estimated_peak_memory_range: min: 12288 - max: 36616768 + max: 38499008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jnp1q842g + job_id: jep2j8jm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1010.0 - throughput: 990.0990099009902 + inference_time: 1013.0 + throughput: 987.1668311944719 estimated_peak_memory_range: min: 1597440 - max: 36578000 + max: 34127872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jqp4jwlvp + job_id: jogkdzdop job_status: Passed - torchscript_onnx_ort: - inference_time: 1404.0 - throughput: 712.2507122507122 + torchscript_onnx: + inference_time: 1455.0 + throughput: 687.2852233676975 estimated_peak_memory_range: - min: 1597440 - max: 24142448 + min: 1175552 + max: 23896144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jopr1yv9g + job_id: jwgomymd5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:55:34Z' + timestamp: '2024-06-22T23:02:47Z' - torchscript_onnx_tflite: - inference_time: 1388.0 - throughput: 720.4610951008646 + inference_time: 1389.0 + throughput: 719.9424046076314 estimated_peak_memory_range: min: 12288 - max: 1476976 + max: 3217944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jvgd7vxeg + job_id: jqpynen4g job_status: Passed torchscript_onnx_qnn: - inference_time: 1447.0 - throughput: 691.0850034554251 + inference_time: 1446.0 + throughput: 691.5629322268327 estimated_peak_memory_range: min: 16384 - max: 13954296 + max: 149700328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jo5mvznw5 + job_id: j1gl7n7l5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:55:32Z' + timestamp: '2024-06-22T23:02:43Z' + - torchscript_onnx_tflite: + inference_time: 1393.0 + throughput: 717.8750897343862 + estimated_peak_memory_range: + min: 12288 + max: 2044968 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: j2p0kyke5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1449.0 + throughput: 690.1311249137336 + estimated_peak_memory_range: + min: 16384 + max: 159650832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 69 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 69 + job_id: jw56v6v7p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:02:44Z' - torchscript_onnx_qnn: - inference_time: 1751.0 - throughput: 571.1022272986864 + inference_time: 1549.0 + throughput: 645.577792123951 estimated_peak_memory_range: min: 1589248 max: 1589248 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j0pxe1k15 + job_id: jn5qw8wm5 job_status: Passed - torchscript_onnx_ort: - inference_time: 2129.0 - throughput: 469.7040864255519 + torchscript_onnx: + inference_time: 2165.0 + throughput: 461.8937644341801 estimated_peak_memory_range: - min: 151552 - max: 151552 + min: 229376 + max: 229376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jep23mk4g + job_id: j1pv434mp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:55:35Z' + timestamp: '2024-06-22T23:02:48Z' diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/README.md b/qai_hub_models/models/posenet_mobilenet_quantized/README.md new file mode 100644 index 00000000..00394618 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/README.md @@ -0,0 +1,56 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [Posenet-Mobilenet-Quantized: Quantized human pose estimator](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized) + +Posenet performs pose estimation on human images. + +This is based on the implementation of Posenet-Mobilenet-Quantized found +[here](https://github.com/rwightman/posenet-pytorch). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on +a hosted Qualcomm® device. + + + + +## Example & Usage + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.posenet_mobilenet_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.posenet_mobilenet_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + +## License +- The license for the original implementation of Posenet-Mobilenet-Quantized can be found + [here](https://github.com/rwightman/posenet-pytorch/blob/master/LICENSE.txt). +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + +## References +* [PersonLab: Person Pose Estimation and Instance Segmentation with a Bottom-Up, Part-Based, Geometric Embedding Model](https://arxiv.org/abs/1803.08225) +* [Source Model Implementation](https://github.com/rwightman/posenet-pytorch) + +## Community +* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/__init__.py b/qai_hub_models/models/posenet_mobilenet_quantized/__init__.py new file mode 100644 index 00000000..9d489997 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/__init__.py @@ -0,0 +1,8 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models.posenet_mobilenet.app import PosenetApp # noqa: F401 + +from .model import MODEL_ID # noqa: F401 +from .model import PosenetMobilenetQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/conftest.py b/qai_hub_models/models/posenet_mobilenet_quantized/conftest.py new file mode 100644 index 00000000..c3a05810 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.posenet_mobilenet_quantized import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/demo.py b/qai_hub_models/models/posenet_mobilenet_quantized/demo.py new file mode 100644 index 00000000..0bd949f0 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/demo.py @@ -0,0 +1,18 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +from qai_hub_models.models.posenet_mobilenet.demo import posenet_demo +from qai_hub_models.models.posenet_mobilenet_quantized.model import ( + PosenetMobilenetQuantizable, +) + + +def main(is_test: bool = False): + return posenet_demo(PosenetMobilenetQuantizable, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/export.py b/qai_hub_models/models/posenet_mobilenet_quantized/export.py new file mode 100644 index 00000000..25b5f26b --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/export.py @@ -0,0 +1,232 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, cast + +import qai_hub as hub + +from qai_hub_models.models.posenet_mobilenet_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.base_model import TargetRuntime +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, + transpose_channel_first_to_last, +) +from qai_hub_models.utils.qnn_helpers import get_qnn_inputs + + +def export_model( + device: str = "Samsung Galaxy S23 (Family)", + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[ + str +]: + """ + This function accomplishes 6 main tasks: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format. + 2. Compiles the model to an asset that can be run on device. + 3. Profiles the model performance on real devices. + 4. Inferences the model on sample inputs. + 5. Downloads the model asset to the local directory. + 6. Summarizes the results from profiling and inference. + + Each of the last four steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A 3-tuple of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + """ + model_name = "posenet_mobilenet_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if chipset: + hub_device = hub.Device(attributes=f"chipset:{chipset}") + else: + hub_device = hub.Device(name=device) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "posenet_mobilenet_quantized", + "Posenet-Mobilenet-Quantized", + device, + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # 1. Initialize PyTorch model + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec + ) + if target_runtime == TargetRuntime.TFLITE: + quant_calibration_data = None + else: + quant_calibration_data = model.get_calibration_data(target_runtime, input_spec) + + # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) + channel_last_flags = ( + " --force_channel_last_input image" + if target_runtime != TargetRuntime.ONNX + else "" + ) + + # 2. Compile the model to an on-device asset + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options + channel_last_flags, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=quant_calibration_data, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profile the model asset on real devices + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Run inference on-device with sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs(input_spec) + hub_inputs = sample_inputs + if target_runtime == TargetRuntime.QNN: + hub_inputs = get_qnn_inputs(compile_job, sample_inputs) + # Convert inputs from channel first to channel last + hub_inputs = ( + sample_inputs + if target_runtime == TargetRuntime.ONNX + else transpose_channel_first_to_last("image", sample_inputs, target_runtime) + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=hub_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Download the model asset to a local file + if not skip_downloading: + if target_runtime == TargetRuntime.QNN: + target_runtime_extension = "so" + elif target_runtime == TargetRuntime.TFLITE: + target_runtime_extension = "tflite" + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: + target_runtime_extension = "onnx" + + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download( + str(output_path / f"{model_name}.{target_runtime_extension}") + ) + + # 6. Summarize the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: Dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + torch_out = torch_inference(model, sample_inputs) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + print_inference_metrics(inference_job, inference_result, torch_out) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device) + + return (compile_job, profile_job, inference_job) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/info.yaml b/qai_hub_models/models/posenet_mobilenet_quantized/info.yaml new file mode 100644 index 00000000..3124e297 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/info.yaml @@ -0,0 +1,40 @@ +name: Posenet-Mobilenet-Quantized +# id must match with the model dir name in qai_hub_models +id: posenet_mobilenet_quantized +status: public +headline: Quantized human pose estimator. +domain: Computer Vision +use_case: Pose Estimation +description: Posenet performs pose estimation on human images. +tags: + - quantized +research_paper: https://arxiv.org/abs/1803.08225 +research_paper_title: 'PersonLab: Person Pose Estimation and Instance Segmentation + with a Bottom-Up, Part-Based, Geometric Embedding Model' +license: https://github.com/rwightman/posenet-pytorch/blob/master/LICENSE.txt +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/rwightman/posenet-pytorch +technical_details: + Model checkpoint: mobilenet_v1_101 + Input resolution: 513x257 + Number of parameters: 3.31M + Model size: 3.47 MB +applicable_scenarios: + - Injury prevention training + - Sports performance analysis + - Posture recognition +form_factors: + - Phone + - Tablet + - IoT +related_models: + - litehrnet + - openpose + - hrnet_pose +has_static_banner: yes +has_animated_banner: yes +license_type: apache-2.0 +deploy_license_type: AI Model Hub License +dataset: + - coco diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/model.py b/qai_hub_models/models/posenet_mobilenet_quantized/model.py new file mode 100644 index 00000000..d7a25275 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/model.py @@ -0,0 +1,87 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, + tie_observers, + constrain_quantized_inputs_to_image_range, +) + +# isort: on + +import torch +from aimet_torch.batch_norm_fold import fold_all_batch_norms +from aimet_torch.cross_layer_equalization import CrossLayerScaling +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim + +from qai_hub_models.models.posenet_mobilenet.model import PosenetMobilenet +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 1 +DEFAULT_ENCODINGS = "posenet_mobilenet_quantized_encodings.json" + + +class PosenetMobilenetQuantizable(AIMETQuantizableMixin, PosenetMobilenet): + """ + PosenetMobilenet with post training quantization support + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date. + """ + + def __init__( + self, + model: QuantizationSimModel, + ) -> None: + PosenetMobilenet.__init__(self, model.model) + AIMETQuantizableMixin.__init__(self, model) + + @classmethod + def from_pretrained( + cls, aimet_encodings: str | None = "DEFAULT" + ) -> PosenetMobilenetQuantizable: + model = PosenetMobilenet.from_pretrained() + input_shape = PosenetMobilenet.get_input_spec()["image"][0] + dummy_input = torch.rand(input_shape) + + model = prepare_model(model) + fold_all_batch_norms(model, input_shape, dummy_input) + CrossLayerScaling.scale_model(model, input_shape, dummy_input) + + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=dummy_input, + ) + tie_observers(sim) + constrain_quantized_inputs_to_image_range(sim) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + load_encodings_to_sim(sim, aimet_encodings) + + final_model = cls(sim) + return final_model + + def forward(self, image: torch.Tensor): + """ + Run PosenetMobilenetQuantizable on `image`, and produce a + predicted set of keypoints. + + See PosenetMobilenet model for details. + """ + return self.model(image) diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml new file mode 100644 index 00000000..4b215629 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml @@ -0,0 +1,265 @@ +aggregated: + supported_oses: + - Android + supported_devices: + - Google Pixel 3 + - Google Pixel 3a + - Google Pixel 3a XL + - Google Pixel 4 + - Google Pixel 4a + - Google Pixel 5a 5G + - QCS8250 (Proxy) + - QCS8550 (Proxy) + - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy Tab S8 + - Snapdragon X Elite CRD + - Xiaomi 12 + - Xiaomi 12 Pro + supported_chipsets: + - Qcs8250 + - Qcs8550 + - Sa8540p + - Sa8775p + - Snapdragon® 8 Gen 1 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 3 + - Snapdragon® 888 + - Snapdragon® X Elite +models: +- name: Posenet-Mobilenet-Quantized + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 591.0 + throughput: 1692.047377326565 + estimated_peak_memory_range: + min: 12288 + max: 1938584 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jlpe2920p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 622.0 + throughput: 1607.717041800643 + estimated_peak_memory_range: + min: 16384 + max: 9093504 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jz576zkrg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-06-22T23:03:19Z' + - torchscript_onnx_tflite: + inference_time: 403.0 + throughput: 2481.3895781637716 + estimated_peak_memory_range: + min: 12288 + max: 47502864 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jygzwew6g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 433.0 + throughput: 2309.4688221709007 + estimated_peak_memory_range: + min: 409600 + max: 31954752 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jqp48qmlg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-06-22T23:03:21Z' + - torchscript_onnx_tflite: + inference_time: 594.0 + throughput: 1683.5016835016836 + estimated_peak_memory_range: + min: 12288 + max: 1688024 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jz5wxoxjp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 616.0 + throughput: 1623.3766233766235 + estimated_peak_memory_range: + min: 12288 + max: 15322296 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jo5m4roq5 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8550 + timestamp: '2024-06-22T23:03:23Z' + - torchscript_onnx_tflite: + inference_time: 573.0 + throughput: 1745.2006980802792 + estimated_peak_memory_range: + min: 12288 + max: 1835216 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jmg98v8vp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 624.0 + throughput: 1602.5641025641025 + estimated_peak_memory_range: + min: 413696 + max: 12761480 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jegnx2om5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:03:24Z' + - torchscript_onnx_tflite: + inference_time: 2251.0 + throughput: 444.247001332741 + estimated_peak_memory_range: + min: 12288 + max: 26289056 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: jnp1303l5 + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs6490 + timestamp: '2024-06-22T23:03:17Z' + - torchscript_onnx_tflite: + inference_time: 11775.0 + throughput: 84.92569002123142 + estimated_peak_memory_range: + min: 528384 + max: 7493424 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 39 + layers_on_gpu: 3 + layers_on_cpu: 0 + total_layers: 42 + job_id: jvgd0w0lp + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8250 + timestamp: '2024-06-22T23:03:18Z' + - torchscript_onnx_qnn: + inference_time: 690.0 + throughput: 1449.2753623188405 + estimated_peak_memory_range: + min: 397312 + max: 397312 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 42 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 42 + job_id: j0pxmv39g + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-06-22T23:03:22Z' diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/test.py b/qai_hub_models/models/posenet_mobilenet_quantized/test.py new file mode 100644 index 00000000..84567ec1 --- /dev/null +++ b/qai_hub_models/models/posenet_mobilenet_quantized/test.py @@ -0,0 +1,53 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np + +from qai_hub_models.models.posenet_mobilenet.app import PosenetApp +from qai_hub_models.models.posenet_mobilenet.demo import IMAGE_ADDRESS +from qai_hub_models.models.posenet_mobilenet_quantized.demo import main as demo_main +from qai_hub_models.models.posenet_mobilenet_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + PosenetMobilenetQuantizable, +) +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + load_image, + load_numpy, +) +from qai_hub_models.utils.testing import skip_clone_repo_check + +KEYPOINT_SCORES_GT = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "keypoint_scores_gt.npy" +) +KEYPOINT_COORDS_GT = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "keypoint_coords_gt.npy" +) + + +@skip_clone_repo_check +def test_task(): + image = load_image(IMAGE_ADDRESS) + model = PosenetMobilenetQuantizable.from_pretrained() + h, w = PosenetMobilenetQuantizable.get_input_spec()["image"][0][2:] + app = PosenetApp(model, h, w) + pose_scores, keypoint_scores, keypoint_coords = app.predict(image, raw_output=True) + + assert pose_scores[0] >= 0.5 + assert pose_scores[1] >= 0.5 + for score in pose_scores[2:]: + assert score < 1e-4 + + np.testing.assert_allclose( + keypoint_scores, load_numpy(KEYPOINT_SCORES_GT), atol=1e-3, rtol=0.05 + ) + np.testing.assert_allclose( + keypoint_coords, load_numpy(KEYPOINT_COORDS_GT), atol=1e-3, rtol=0.05 + ) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/quicksrnetlarge/export.py b/qai_hub_models/models/quicksrnetlarge/export.py index 5f3ec808..50258050 100644 --- a/qai_hub_models/models/quicksrnetlarge/export.py +++ b/qai_hub_models/models/quicksrnetlarge/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/quicksrnetlarge/info.yaml b/qai_hub_models/models/quicksrnetlarge/info.yaml index b139e358..5d42ebdf 100644 --- a/qai_hub_models/models/quicksrnetlarge/info.yaml +++ b/qai_hub_models/models/quicksrnetlarge/info.yaml @@ -17,7 +17,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: Model checkpoint: quicksrnet_large_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 424K Model size: 1.63 MB applicable_scenarios: diff --git a/qai_hub_models/models/quicksrnetlarge/model.py b/qai_hub_models/models/quicksrnetlarge/model.py index 6a83e660..2f4fa63d 100644 --- a/qai_hub_models/models/quicksrnetlarge/model.py +++ b/qai_hub_models/models/quicksrnetlarge/model.py @@ -46,6 +46,5 @@ def from_pretrained( ) checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) - model.eval() return cls(model, scale_factor) diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml index 4ca0e8f0..4c10c066 100644 --- a/qai_hub_models/models/quicksrnetlarge/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: QuickSRNetLarge performance_metrics: - torchscript_onnx_tflite: - inference_time: 2412.0 - throughput: 414.5936981757877 + inference_time: 2439.0 + throughput: 410.0041000410004 estimated_peak_memory_range: - min: 28672 - max: 1429016 + min: 24576 + max: 1394872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j1gle1ojp + job_id: jn5qw8zm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2108.0 - throughput: 474.3833017077799 + inference_time: 2109.0 + throughput: 474.158368895211 estimated_peak_memory_range: - min: 229376 - max: 5466776 + min: 217088 + max: 65739352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jwgoe4oqp + job_id: jwgomyod5 job_status: Passed - torchscript_onnx_ort: - inference_time: 2712.0 - throughput: 368.7315634218289 + torchscript_onnx: + inference_time: 2642.0 + throughput: 378.5011355034065 estimated_peak_memory_range: - min: 16384 - max: 20834136 + min: 12288 + max: 6581808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jygzv44op + job_id: jz5wxo8jp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:56:08Z' + timestamp: '2024-06-22T23:03:59Z' - torchscript_onnx_tflite: - inference_time: 1740.0 - throughput: 574.7126436781609 + inference_time: 1779.0 + throughput: 562.1135469364812 estimated_peak_memory_range: min: 16384 - max: 29572928 + max: 30262592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jw56qdr6g + job_id: j1gl7nol5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1500.0 - throughput: 666.6666666666666 + inference_time: 1498.0 + throughput: 667.5567423230974 estimated_peak_memory_range: min: 204800 - max: 21850576 + max: 20871824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j1pvz99kg + job_id: j1pv43emp job_status: Passed - torchscript_onnx_ort: - inference_time: 1855.0 - throughput: 539.0835579514825 + torchscript_onnx: + inference_time: 1812.0 + throughput: 551.8763796909492 estimated_peak_memory_range: min: 212992 - max: 19290704 + max: 18814624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jz5wm113g + job_id: jmg98vkvp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:56:09Z' + timestamp: '2024-06-22T23:04:00Z' - torchscript_onnx_tflite: - inference_time: 2478.0 - throughput: 403.5512510088781 + inference_time: 2437.0 + throughput: 410.3405826836274 estimated_peak_memory_range: - min: 24576 - max: 1690672 + min: 28672 + max: 2322816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j1p3qwx35 + job_id: jw56v6r7p job_status: Passed torchscript_onnx_qnn: inference_time: 2101.0 throughput: 475.9638267491671 estimated_peak_memory_range: - min: 221184 - max: 5373456 + min: 12288 + max: 72339216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jlpe4llo5 + job_id: jlpe2980p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:56:07Z' + timestamp: '2024-06-22T23:03:56Z' + - torchscript_onnx_tflite: + inference_time: 2451.0 + throughput: 407.9967360261118 + estimated_peak_memory_range: + min: 24576 + max: 17913792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 28 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 31 + job_id: j1p38kxz5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2101.0 + throughput: 475.9638267491671 + estimated_peak_memory_range: + min: 212992 + max: 21803552 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 31 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 31 + job_id: jygzwe86g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:03:57Z' - torchscript_onnx_qnn: - inference_time: 2949.0 - throughput: 339.097999321804 + inference_time: 2272.0 + throughput: 440.14084507042253 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 212992 + max: 212992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j7gjkwwv5 + job_id: j7gj1xo8g job_status: Passed - torchscript_onnx_ort: + torchscript_onnx: inference_time: 2692.0 throughput: 371.4710252600297 estimated_peak_memory_range: - min: 13115392 - max: 13115392 + min: 12689408 + max: 12689408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jmg99xxwg + job_id: jnp1307l5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:56:10Z' + timestamp: '2024-06-22T23:04:01Z' diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py index 4b832a72..15495fbd 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/export.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -127,7 +127,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -177,7 +177,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -217,7 +217,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml index 897f7e5f..613f8381 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml @@ -18,7 +18,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: Model checkpoint: quicksrnet_large_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 424K Model size: 449 KB applicable_scenarios: diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py index 4767a779..6a5a8a69 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/model.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py @@ -76,6 +76,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() - return cls(sim, scale_factor) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml index 6fc4336f..1ca35c3b 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: QuickSRNetLarge-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1324.0 - throughput: 755.2870090634441 + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 12288 - max: 2457016 + min: 24576 + max: 69497376 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jvgd7zzrg + job_id: jz576z7rg job_status: Passed torchscript_onnx_qnn: - inference_time: 1159.0 - throughput: 862.8127696289905 + inference_time: 1155.0 + throughput: 865.8008658008658 estimated_peak_memory_range: - min: 77824 - max: 3860912 + min: 28672 + max: 3835016 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jz57v7795 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1039.0 - throughput: 962.4639076034649 - estimated_peak_memory_range: - min: 69632 - max: 4717016 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 22 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 22 - job_id: jegnr77q5 + job_id: jep2j8vm5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:56:37Z' + timestamp: '2024-06-22T23:04:27Z' - torchscript_onnx_tflite: - inference_time: 1024.0 - throughput: 976.5625 + inference_time: 1071.0 + throughput: 933.7068160597572 estimated_peak_memory_range: - min: 49152 - max: 25834320 + min: 16384 + max: 26377568 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jz5wm11mg + job_id: jqp48q9lg job_status: Passed torchscript_onnx_qnn: - inference_time: 812.0 - throughput: 1231.527093596059 + inference_time: 806.0 + throughput: 1240.6947890818858 estimated_peak_memory_range: - min: 7340032 - max: 27038272 + min: 12288 + max: 18885968 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jqp4j991p - job_status: Passed - torchscript_onnx_ort: - inference_time: 776.0 - throughput: 1288.659793814433 - estimated_peak_memory_range: - min: 36864 - max: 17135056 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 22 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 22 - job_id: jopr1nn7g + job_id: jqpyne74g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:56:37Z' + timestamp: '2024-06-22T23:04:29Z' - torchscript_onnx_tflite: - inference_time: 1364.0 - throughput: 733.1378299120234 + inference_time: 1330.0 + throughput: 751.8796992481203 estimated_peak_memory_range: min: 16384 - max: 1375064 + max: 1486272 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jmg99xx8g + job_id: j0pxmvd9g job_status: Passed torchscript_onnx_qnn: - inference_time: 1156.0 - throughput: 865.0519031141869 + inference_time: 1154.0 + throughput: 866.5511265164645 estimated_peak_memory_range: - min: 94208 - max: 9070680 + min: 20480 + max: 3931656 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jo5mvdd95 + job_id: j1p88o48p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:56:36Z' + timestamp: '2024-06-22T23:04:32Z' - torchscript_onnx_tflite: - inference_time: 3979.0 - throughput: 251.31942699170645 + inference_time: 1398.0 + throughput: 715.307582260372 estimated_peak_memory_range: - min: 12288 - max: 18592624 + min: 28672 + max: 63302504 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jnp1qvv7g + job_id: jo5m4rdq5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1158.0 + throughput: 863.5578583765113 + estimated_peak_memory_range: + min: 73728 + max: 8456712 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 19 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 19 + job_id: jogkdz9op + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:04:33Z' + - torchscript_onnx_tflite: + inference_time: 3627.0 + throughput: 275.70995312930796 + estimated_peak_memory_range: + min: 36864 + max: 19522368 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 28 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 31 + job_id: jegnx27m5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-11T11:56:31Z' + timestamp: '2024-06-22T23:04:25Z' - torchscript_onnx_tflite: - inference_time: 32895.0 - throughput: 30.399756801945585 + inference_time: 34026.0 + throughput: 29.389290542526304 estimated_peak_memory_range: - min: 4079616 - max: 6087016 + min: 258048 + max: 7087952 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jvgd7zzzg + job_id: jopr9knep job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-11T11:56:32Z' + timestamp: '2024-06-22T23:04:26Z' - torchscript_onnx_qnn: - inference_time: 1008.0 - throughput: 992.063492063492 + inference_time: 1065.0 + throughput: 938.9671361502348 estimated_peak_memory_range: - min: 90112 - max: 90112 + min: 57344 + max: 57344 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: j0pxeddl5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1090.0 - throughput: 917.4311926605504 - estimated_peak_memory_range: - min: 4714496 - max: 4714496 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 22 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 22 - job_id: jep23vvqg + job_id: j2p0kyve5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:56:38Z' + timestamp: '2024-06-22T23:04:31Z' diff --git a/qai_hub_models/models/quicksrnetmedium/export.py b/qai_hub_models/models/quicksrnetmedium/export.py index 20dca067..5cae8684 100644 --- a/qai_hub_models/models/quicksrnetmedium/export.py +++ b/qai_hub_models/models/quicksrnetmedium/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/quicksrnetmedium/info.yaml b/qai_hub_models/models/quicksrnetmedium/info.yaml index 9f0a95c6..88490a4f 100644 --- a/qai_hub_models/models/quicksrnetmedium/info.yaml +++ b/qai_hub_models/models/quicksrnetmedium/info.yaml @@ -17,7 +17,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: Model checkpoint: quicksrnet_medium_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 55.0K Model size: 220 KB applicable_scenarios: diff --git a/qai_hub_models/models/quicksrnetmedium/model.py b/qai_hub_models/models/quicksrnetmedium/model.py index 65c91c46..0b35d777 100644 --- a/qai_hub_models/models/quicksrnetmedium/model.py +++ b/qai_hub_models/models/quicksrnetmedium/model.py @@ -46,6 +46,5 @@ def from_pretrained( ) checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) - model.eval() return cls(model, scale_factor) diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml index a70a9366..b5095b48 100644 --- a/qai_hub_models/models/quicksrnetmedium/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: QuickSRNetMedium performance_metrics: - torchscript_onnx_tflite: - inference_time: 1343.0 - throughput: 744.6016381236038 + inference_time: 1382.0 + throughput: 723.589001447178 estimated_peak_memory_range: - min: 16384 - max: 1439320 + min: 24576 + max: 1576320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j2p0evvn5 + job_id: j7gj1xw8g job_status: Passed torchscript_onnx_qnn: - inference_time: 988.0 - throughput: 1012.1457489878543 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: - min: 12288 - max: 2409584 + min: 212992 + max: 67862072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jw56qdwyg + job_id: jmg98vxvp job_status: Passed - torchscript_onnx_ort: - inference_time: 1506.0 - throughput: 664.0106241699867 + torchscript_onnx: + inference_time: 1576.0 + throughput: 634.5177664974619 estimated_peak_memory_range: min: 217088 - max: 3451560 + max: 68562816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jygzv4nxp + job_id: jnp130625 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:57:01Z' + timestamp: '2024-06-22T23:05:07Z' - torchscript_onnx_tflite: - inference_time: 898.0 - throughput: 1113.5857461024498 + inference_time: 1004.0 + throughput: 996.01593625498 estimated_peak_memory_range: - min: 20480 - max: 20940320 + min: 16384 + max: 21443360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j1p8w44op + job_id: jlpe29l0p job_status: Passed torchscript_onnx_qnn: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 654.0 + throughput: 1529.051987767584 estimated_peak_memory_range: min: 208896 - max: 17163888 + max: 16681856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jwgoe48kp + job_id: jnp130vl5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1070.0 - throughput: 934.5794392523364 + torchscript_onnx: + inference_time: 1040.0 + throughput: 961.5384615384615 estimated_peak_memory_range: min: 212992 - max: 13764384 + max: 13277408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jz5wm14mg + job_id: jvgd0w2ep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:57:02Z' + timestamp: '2024-06-22T23:05:08Z' - torchscript_onnx_tflite: - inference_time: 1369.0 - throughput: 730.4601899196493 + inference_time: 1390.0 + throughput: 719.4244604316547 estimated_peak_memory_range: - min: 24576 - max: 1342320 + min: 12673024 + max: 14455856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jn5q9mmop + job_id: jygzwe46g job_status: Passed torchscript_onnx_qnn: - inference_time: 1010.0 - throughput: 990.0990099009902 + inference_time: 999.0 + throughput: 1001.001001001001 estimated_peak_memory_range: - min: 221184 - max: 7892152 + min: 28672 + max: 12587064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jlpe4lyv5 + job_id: jz5wxo46p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:57:00Z' + timestamp: '2024-06-22T23:05:05Z' + - torchscript_onnx_tflite: + inference_time: 1467.0 + throughput: 681.6632583503749 + estimated_peak_memory_range: + min: 28672 + max: 16733008 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: jz5wxo1jp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1008.0 + throughput: 992.063492063492 + estimated_peak_memory_range: + min: 212992 + max: 67601112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 17 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 17 + job_id: jmg98vdlp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:05:06Z' - torchscript_onnx_qnn: - inference_time: 1066.0 - throughput: 938.0863039399625 + inference_time: 1136.0 + throughput: 880.2816901408451 estimated_peak_memory_range: min: 204800 max: 204800 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j7gjkwqe5 + job_id: jvgd0wzlp job_status: Passed - torchscript_onnx_ort: - inference_time: 1498.0 - throughput: 667.5567423230974 + torchscript_onnx: + inference_time: 1507.0 + throughput: 663.5700066357001 estimated_peak_memory_range: - min: 9003008 - max: 9003008 + min: 9035776 + max: 9035776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jmg99xd8g + job_id: jz576z9lg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:57:03Z' + timestamp: '2024-06-22T23:05:09Z' diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py index fa37875f..da366568 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/export.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -127,7 +127,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -177,7 +177,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -217,7 +217,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml index e17071f4..f0d86e74 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml @@ -18,7 +18,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: Model checkpoint: quicksrnet_medium_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 55.0K Model size: 67.2 KB applicable_scenarios: diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py index e16d87c5..72457952 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/model.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py @@ -76,6 +76,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() - return cls(sim, scale_factor) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml index 72df875e..ce4d297d 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: QuickSRNetMedium-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1000.0 - throughput: 1000.0 + inference_time: 997.0 + throughput: 1003.0090270812437 estimated_peak_memory_range: - min: 12288 - max: 5493824 + min: 16384 + max: 1924136 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jvgd7z2zg + job_id: j0pxmvx1g job_status: Passed torchscript_onnx_qnn: - inference_time: 803.0 - throughput: 1245.3300124533 + inference_time: 804.0 + throughput: 1243.7810945273632 estimated_peak_memory_range: - min: 16384 - max: 10291792 + min: 20480 + max: 2668672 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jegnr7kq5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 757.0 - throughput: 1321.003963011889 - estimated_peak_memory_range: - min: 65536 - max: 19746264 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 14 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 14 - job_id: j2p0ev6n5 + job_id: j2p0ky665 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:57:28Z' + timestamp: '2024-06-22T23:05:34Z' - torchscript_onnx_tflite: - inference_time: 814.0 - throughput: 1228.5012285012285 + inference_time: 949.0 + throughput: 1053.740779768177 estimated_peak_memory_range: - min: 12288 - max: 20707552 + min: 16384 + max: 21043280 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jz57v7995 + job_id: jo5m4r8w5 job_status: Passed torchscript_onnx_qnn: - inference_time: 546.0 - throughput: 1831.5018315018315 + inference_time: 548.0 + throughput: 1824.8175182481752 estimated_peak_memory_range: min: 65536 - max: 14574352 + max: 15605120 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jopr1nw7g - job_status: Passed - torchscript_onnx_ort: - inference_time: 558.0 - throughput: 1792.1146953405018 - estimated_peak_memory_range: - min: 65536 - max: 12140448 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 14 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 14 - job_id: j1p8w41op + job_id: j1p88o1xp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:57:29Z' + timestamp: '2024-06-22T23:05:35Z' - torchscript_onnx_tflite: - inference_time: 995.0 - throughput: 1005.0251256281407 + inference_time: 1019.0 + throughput: 981.3542688910696 estimated_peak_memory_range: min: 24576 - max: 3118760 + max: 1393312 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jqp4j931p + job_id: jegnx2kr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 800.0 - throughput: 1250.0 + inference_time: 801.0 + throughput: 1248.4394506866417 estimated_peak_memory_range: - min: 16384 - max: 18363240 + min: 65536 + max: 10272704 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jqpyv7mlp + job_id: jn5qw8v45 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:57:27Z' + timestamp: '2024-06-22T23:05:38Z' + - torchscript_onnx_tflite: + inference_time: 1011.0 + throughput: 989.1196834817013 + estimated_peak_memory_range: + min: 36864 + max: 3142544 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: jopr9kw9p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 802.0 + throughput: 1246.8827930174564 + estimated_peak_memory_range: + min: 12288 + max: 69736248 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 11 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 11 + job_id: j1gl7nl85 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:05:39Z' - torchscript_onnx_tflite: - inference_time: 1968.0 - throughput: 508.130081300813 + inference_time: 1906.0 + throughput: 524.6589716684156 estimated_peak_memory_range: min: 12288 - max: 14747456 + max: 15189776 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j0pxedxl5 + job_id: jep2j8e45 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-11T11:57:22Z' + timestamp: '2024-06-22T23:05:32Z' - torchscript_onnx_tflite: - inference_time: 9155.0 - throughput: 109.22992900054615 + inference_time: 7949.0 + throughput: 125.80198767140521 estimated_peak_memory_range: - min: 3342336 - max: 7015776 + min: 3489792 + max: 6727064 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jo5mvd895 + job_id: jqpynem7g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-11T11:57:23Z' + timestamp: '2024-06-22T23:05:33Z' - torchscript_onnx_qnn: - inference_time: 764.0 - throughput: 1308.9005235602094 + inference_time: 712.0 + throughput: 1404.4943820224719 estimated_peak_memory_range: - min: 1196032 - max: 1196032 + min: 98304 + max: 98304 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jep23veqg - job_status: Passed - torchscript_onnx_ort: - inference_time: 781.0 - throughput: 1280.4097311139565 - estimated_peak_memory_range: - min: 7262208 - max: 7262208 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 14 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 14 - job_id: jogkr98n5 + job_id: jogkdz82p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:57:30Z' + timestamp: '2024-06-22T23:05:36Z' diff --git a/qai_hub_models/models/quicksrnetsmall/export.py b/qai_hub_models/models/quicksrnetsmall/export.py index 7bf13e43..941bd800 100644 --- a/qai_hub_models/models/quicksrnetsmall/export.py +++ b/qai_hub_models/models/quicksrnetsmall/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/quicksrnetsmall/info.yaml b/qai_hub_models/models/quicksrnetsmall/info.yaml index 94d58142..2ccfe754 100644 --- a/qai_hub_models/models/quicksrnetsmall/info.yaml +++ b/qai_hub_models/models/quicksrnetsmall/info.yaml @@ -17,7 +17,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: Model checkpoint: quicksrnet_small_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 27.2K Model size: 110 KB applicable_scenarios: diff --git a/qai_hub_models/models/quicksrnetsmall/model.py b/qai_hub_models/models/quicksrnetsmall/model.py index 9b21851e..83d53858 100644 --- a/qai_hub_models/models/quicksrnetsmall/model.py +++ b/qai_hub_models/models/quicksrnetsmall/model.py @@ -46,6 +46,5 @@ def from_pretrained( ) checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) - model.eval() return cls(model, scale_factor) diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml index 16c8950f..6364aa18 100644 --- a/qai_hub_models/models/quicksrnetsmall/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: QuickSRNetSmall performance_metrics: - torchscript_onnx_tflite: - inference_time: 1334.0 - throughput: 749.6251874062968 + inference_time: 1328.0 + throughput: 753.0120481927711 estimated_peak_memory_range: - min: 28672 - max: 1646912 + min: 32768 + max: 4162488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j1gle1xmp + job_id: jygzwenkg job_status: Passed torchscript_onnx_qnn: - inference_time: 1004.0 - throughput: 996.01593625498 + inference_time: 1017.0 + throughput: 983.284169124877 estimated_peak_memory_range: min: 221184 - max: 10711856 + max: 2807024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jwgoe4rkp + job_id: jvgd0w3ep job_status: Passed - torchscript_onnx_ort: - inference_time: 1405.0 - throughput: 711.7437722419929 + torchscript_onnx: + inference_time: 1440.0 + throughput: 694.4444444444445 estimated_peak_memory_range: - min: 212992 - max: 2559280 + min: 217088 + max: 3113208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jygzv4mxp + job_id: jegnx2nr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:57:52Z' + timestamp: '2024-06-22T23:06:13Z' - torchscript_onnx_tflite: - inference_time: 936.0 - throughput: 1068.3760683760684 + inference_time: 915.0 + throughput: 1092.896174863388 estimated_peak_memory_range: min: 16384 - max: 19633600 + max: 19974256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jw56qd7yg + job_id: jz5wxo76p job_status: Passed torchscript_onnx_qnn: - inference_time: 624.0 - throughput: 1602.5641025641025 + inference_time: 627.0 + throughput: 1594.896331738437 estimated_peak_memory_range: min: 208896 - max: 13403568 + max: 12686432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j1pvz9drg + job_id: jz576z4lg job_status: Passed - torchscript_onnx_ort: - inference_time: 949.0 - throughput: 1053.740779768177 + torchscript_onnx: + inference_time: 996.0 + throughput: 1004.0160642570281 estimated_peak_memory_range: - min: 212992 - max: 12509200 + min: 339968 + max: 12452704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jz5wm17mg + job_id: jopr9k09p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:57:53Z' + timestamp: '2024-06-22T23:06:14Z' - torchscript_onnx_tflite: - inference_time: 1319.0 - throughput: 758.1501137225171 + inference_time: 1379.0 + throughput: 725.1631617113851 estimated_peak_memory_range: - min: 20480 - max: 7876136 + min: 24576 + max: 1325592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j1p3qw9n5 + job_id: jmg98vmlp job_status: Passed torchscript_onnx_qnn: - inference_time: 992.0 - throughput: 1008.0645161290323 + inference_time: 1020.0 + throughput: 980.3921568627451 estimated_peak_memory_range: min: 229376 - max: 12485448 + max: 2190712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jlpe4lzv5 + job_id: j0pxmv41g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,7 +180,45 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:57:51Z' + timestamp: '2024-06-22T23:06:10Z' + - torchscript_onnx_tflite: + inference_time: 1430.0 + throughput: 699.3006993006993 + estimated_peak_memory_range: + min: 6361088 + max: 8078024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 8 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 11 + job_id: jnp130j25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1003.0 + throughput: 997.0089730807578 + estimated_peak_memory_range: + min: 229376 + max: 9373504 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 11 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 11 + job_id: jo5m4rmw5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:06:12Z' - torchscript_onnx_qnn: inference_time: 1112.0 throughput: 899.2805755395683 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j7gjkw7e5 + job_id: jqp48q1vg job_status: Passed - torchscript_onnx_ort: - inference_time: 1419.0 - throughput: 704.7216349541931 + torchscript_onnx: + inference_time: 1464.0 + throughput: 683.0601092896175 estimated_peak_memory_range: - min: 8966144 - max: 8966144 + min: 8876032 + max: 8876032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jmg99xm8g + job_id: jep2j8w45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:57:53Z' + timestamp: '2024-06-22T23:06:15Z' diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py index 48cf6baf..5145913e 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/export.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -127,7 +127,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -177,7 +177,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -217,7 +217,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml index 37b32e7f..eefceadb 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml @@ -18,7 +18,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: Model checkpoint: quicksrnet_small_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 27.2K Model size: 34.9 KB applicable_scenarios: diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py index 9b1c83a6..3491e3c0 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/model.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py @@ -76,6 +76,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() - return cls(sim, scale_factor) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml index babfa73c..d9d93e62 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: QuickSRNetSmall-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 974.0 - throughput: 1026.694045174538 + inference_time: 958.0 + throughput: 1043.8413361169103 estimated_peak_memory_range: - min: 12288 - max: 2677152 + min: 24576 + max: 1721664 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jvgd7z3zg + job_id: j2p0kyj65 job_status: Passed torchscript_onnx_qnn: - inference_time: 671.0 - throughput: 1490.312965722802 + inference_time: 665.0 + throughput: 1503.7593984962407 estimated_peak_memory_range: - min: 65536 - max: 3287624 + min: 16384 + max: 56667584 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: jegnr7nq5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 691.0 - throughput: 1447.178002894356 - estimated_peak_memory_range: - min: 53248 - max: 3206304 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 12 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 12 - job_id: j2p0evjn5 + job_id: j1p38k9l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:58:18Z' + timestamp: '2024-06-22T23:06:39Z' - torchscript_onnx_tflite: - inference_time: 793.0 - throughput: 1261.034047919294 + inference_time: 831.0 + throughput: 1203.3694344163657 estimated_peak_memory_range: - min: 16384 - max: 19014000 + min: 20480 + max: 19530352 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jz57v7495 + job_id: j1p88oxxp job_status: Passed torchscript_onnx_qnn: - inference_time: 458.0 - throughput: 2183.406113537118 + inference_time: 452.0 + throughput: 2212.3893805309735 estimated_peak_memory_range: - min: 81920 - max: 14588544 + min: 65536 + max: 14002832 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: jopr1n07g - job_status: Passed - torchscript_onnx_ort: - inference_time: 553.0 - throughput: 1808.3182640144666 - estimated_peak_memory_range: - min: 65536 - max: 10727936 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 12 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 12 - job_id: j1p8w4xop + job_id: jwgomyrx5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:58:19Z' + timestamp: '2024-06-22T23:06:40Z' - torchscript_onnx_tflite: - inference_time: 960.0 - throughput: 1041.6666666666667 + inference_time: 964.0 + throughput: 1037.344398340249 estimated_peak_memory_range: - min: 20480 - max: 1503368 + min: 24576 + max: 3058040 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jqp4j911p + job_id: jogkdz42p job_status: Passed torchscript_onnx_qnn: - inference_time: 672.0 - throughput: 1488.095238095238 + inference_time: 661.0 + throughput: 1512.8593040847202 estimated_peak_memory_range: - min: 12288 - max: 47001808 + min: 16384 + max: 18934480 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: jqpyv7xlp + job_id: j7gj1x7xg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:58:17Z' + timestamp: '2024-06-22T23:06:43Z' + - torchscript_onnx_tflite: + inference_time: 1136.0 + throughput: 880.2816901408451 + estimated_peak_memory_range: + min: 28672 + max: 1437824 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 8 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 11 + job_id: jn5qw8y45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 665.0 + throughput: 1503.7593984962407 + estimated_peak_memory_range: + min: 12288 + max: 10731904 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 8 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 8 + job_id: jlpe29z1p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:06:44Z' - torchscript_onnx_tflite: - inference_time: 1754.0 - throughput: 570.1254275940707 + inference_time: 2549.0 + throughput: 392.31071008238524 estimated_peak_memory_range: min: 12288 - max: 13580528 + max: 14671248 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j0pxed4l5 + job_id: j1gl7nx85 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-11T11:58:12Z' + timestamp: '2024-06-22T23:06:36Z' - torchscript_onnx_tflite: - inference_time: 5837.0 - throughput: 171.32088401576152 + inference_time: 5757.0 + throughput: 173.70158068438423 estimated_peak_memory_range: - min: 249856 - max: 7133040 + min: 172032 + max: 7165888 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jo5mvdm95 + job_id: jw56v670p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-11T11:58:13Z' + timestamp: '2024-06-22T23:06:38Z' - torchscript_onnx_qnn: - inference_time: 718.0 - throughput: 1392.757660167131 + inference_time: 689.0 + throughput: 1451.3788098693758 estimated_peak_memory_range: - min: 1077248 - max: 1077248 + min: 57344 + max: 57344 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: jep23vwqg - job_status: Passed - torchscript_onnx_ort: - inference_time: 698.0 - throughput: 1432.6647564469913 - estimated_peak_memory_range: - min: 7000064 - max: 7000064 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 12 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 12 - job_id: jogkr94n5 + job_id: j1pv43djp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:58:20Z' + timestamp: '2024-06-22T23:06:41Z' diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/export.py b/qai_hub_models/models/real_esrgan_general_x4v3/export.py index 4aa9e1fd..0e7bf90f 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/export.py +++ b/qai_hub_models/models/real_esrgan_general_x4v3/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml index 6f9afa6c..726c3bb8 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml +++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Real-ESRGAN-General-x4v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 7261.0 - throughput: 137.72207684891887 + inference_time: 7246.0 + throughput: 138.0071763731714 estimated_peak_memory_range: - min: 17604608 - max: 25105264 + min: 6352896 + max: 7871664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jmg994wwg + job_id: jqp48qyvg job_status: Passed torchscript_onnx_qnn: - inference_time: 6295.0 - throughput: 158.85623510722795 + inference_time: 6271.0 + throughput: 159.46420028703557 estimated_peak_memory_range: - min: 221184 - max: 4921640 + min: 16384 + max: 21425416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jz57vdxv5 + job_id: jopr9kl9p job_status: Passed - torchscript_onnx_ort: - inference_time: 6938.0 - throughput: 144.13375612568464 + torchscript_onnx: + inference_time: 6849.0 + throughput: 146.00671630895022 estimated_peak_memory_range: - min: 6332416 - max: 55155560 + min: 8425472 + max: 21248128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jegnre3k5 + job_id: jogkdz22p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T22:59:37Z' + timestamp: '2024-06-22T23:07:19Z' - torchscript_onnx_tflite: - inference_time: 5588.0 - throughput: 178.9549033643522 + inference_time: 5415.0 + throughput: 184.67220683287167 estimated_peak_memory_range: min: 20480 - max: 56093568 + max: 59196832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jnp1q8e8g + job_id: j0pxmvl1g job_status: Passed torchscript_onnx_qnn: - inference_time: 4604.0 - throughput: 217.2024326672459 + inference_time: 4603.0 + throughput: 217.24961981316534 estimated_peak_memory_range: - min: 208896 - max: 37726496 + min: 0 + max: 29466816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jqp4jwv8p + job_id: jep2j8r45 job_status: Passed - torchscript_onnx_ort: - inference_time: 5181.0 - throughput: 193.01293186643505 + torchscript_onnx: + inference_time: 5194.0 + throughput: 192.52984212552946 estimated_peak_memory_range: - min: 2310144 - max: 36289552 + min: 8531968 + max: 46212560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jopr1ye0g + job_id: jn5qw8l45 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T22:59:38Z' + timestamp: '2024-06-22T23:07:20Z' - torchscript_onnx_tflite: - inference_time: 7376.0 - throughput: 135.57483731019522 + inference_time: 7329.0 + throughput: 136.4442625187611 estimated_peak_memory_range: - min: 6377472 - max: 7904672 + min: 15773696 + max: 18993704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jvgd7vorg + job_id: jo5m4r0w5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6271.0 - throughput: 159.46420028703557 + inference_time: 6252.0 + throughput: 159.9488163787588 estimated_peak_memory_range: - min: 131072 - max: 5213032 + min: 20480 + max: 9802976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jo5mvz3d5 + job_id: j2p0kym65 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T22:59:36Z' + timestamp: '2024-06-22T23:07:17Z' + - torchscript_onnx_tflite: + inference_time: 7428.0 + throughput: 134.62574044157242 + estimated_peak_memory_range: + min: 15761408 + max: 17170032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 69 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 72 + job_id: jegnx2zr5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6297.0 + throughput: 158.8057805304113 + estimated_peak_memory_range: + min: 16384 + max: 6527512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: j1p88oexp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:07:18Z' - torchscript_onnx_qnn: - inference_time: 8670.0 - throughput: 115.34025374855824 + inference_time: 6159.0 + throughput: 162.3640201331385 estimated_peak_memory_range: - min: 208896 - max: 208896 + min: 212992 + max: 212992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j0pxe1y35 + job_id: jqpyneo7g job_status: Passed - torchscript_onnx_ort: - inference_time: 7041.0 - throughput: 142.02528049992898 + torchscript_onnx: + inference_time: 7065.0 + throughput: 141.54281670205236 estimated_peak_memory_range: - min: 8646656 - max: 8646656 + min: 8929280 + max: 8929280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jep23mlrg + job_id: j1gl7ny85 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T22:59:39Z' + timestamp: '2024-06-22T23:07:22Z' diff --git a/qai_hub_models/models/real_esrgan_x4plus/export.py b/qai_hub_models/models/real_esrgan_x4plus/export.py index ab0454e1..14243e7b 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/export.py +++ b/qai_hub_models/models/real_esrgan_x4plus/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -115,7 +115,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # 2. Compile the model to an on-device asset @@ -172,7 +171,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/real_esrgan_x4plus/model.py b/qai_hub_models/models/real_esrgan_x4plus/model.py index 965ce49b..758f6172 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/model.py +++ b/qai_hub_models/models/real_esrgan_x4plus/model.py @@ -50,9 +50,7 @@ def from_pretrained( """Load RealESRGAN from a weightfile created by the source RealESRGAN repository.""" # Load PyTorch model from disk - realesrgan_model = _load_realesrgan_source_model_from_weights( - weight_path - ).eval() + realesrgan_model = _load_realesrgan_source_model_from_weights(weight_path) return cls(realesrgan_model) @@ -74,11 +72,10 @@ def forward(self, image: torch.Tensor) -> torch.Tensor: 3-channel Color Space: RGB """ - with torch.no_grad(): - # upscale - output = self.model(image) + # upscale + output = self.model(image) - output_img = output.squeeze().float().cpu().clamp_(0, 1) + output_img = output.squeeze().float().cpu().clamp_(0, 1) return output_img diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml index c2ee5598..ceafb54d 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml +++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Real-ESRGAN-x4plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 71761.0 - throughput: 13.935145831301124 + inference_time: 69551.0 + throughput: 14.377938491179135 estimated_peak_memory_range: - min: 4210688 - max: 13102152 + min: 16384 + max: 5486376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: j2p0erl95 + job_id: j1p38kzl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 70398.0 - throughput: 14.204949004233075 + inference_time: 67063.0 + throughput: 14.911352012286955 estimated_peak_memory_range: - min: 12288 - max: 106397920 + min: 94208 + max: 57757240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: jn5q923np + job_id: jlpe2971p job_status: Passed - torchscript_onnx_ort: - inference_time: 65953.0 - throughput: 15.162312555911027 + torchscript_onnx: + inference_time: 68220.0 + throughput: 14.65845793022574 estimated_peak_memory_range: - min: 6344704 - max: 155593192 + min: 0 + max: 157156904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jwgoev3qp + job_id: jvgd0w6ep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:00:20Z' + timestamp: '2024-06-22T23:08:06Z' - torchscript_onnx_tflite: - inference_time: 52163.0 - throughput: 19.170676533174856 + inference_time: 54479.0 + throughput: 18.355696690467887 estimated_peak_memory_range: - min: 77824 - max: 586842272 + min: 3272704 + max: 609931104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: j1p8w7zkp + job_id: jwgomylx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 50801.0 - throughput: 19.684651876931557 + inference_time: 50583.0 + throughput: 19.769487772571814 estimated_peak_memory_range: - min: 102400 - max: 264449376 + min: 73728 + max: 223617072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: j1glek3jp + job_id: jygzwelkg job_status: Passed - torchscript_onnx_ort: - inference_time: 51691.0 - throughput: 19.34572749608249 + torchscript_onnx: + inference_time: 51083.0 + throughput: 19.57598418260478 estimated_peak_memory_range: - min: 6029312 - max: 190175536 + min: 6291456 + max: 172334736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: j1pvzwwkg + job_id: jz576zolg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:00:21Z' + timestamp: '2024-06-22T23:08:07Z' - torchscript_onnx_tflite: - inference_time: 67995.0 - throughput: 14.706963747334363 + inference_time: 67790.0 + throughput: 14.75143826523086 estimated_peak_memory_range: - min: 1552384 - max: 4034000 + min: 20480 + max: 2510496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jogkry3w5 + job_id: j1pv43ljp job_status: Passed torchscript_onnx_qnn: - inference_time: 69414.0 - throughput: 14.406315728815512 + inference_time: 67113.0 + throughput: 14.900242873958845 estimated_peak_memory_range: - min: 0 - max: 56605216 + min: 49152 + max: 107219464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: j1p3qme35 + job_id: jmg98volp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:00:19Z' + timestamp: '2024-06-22T23:08:04Z' + - torchscript_onnx_tflite: + inference_time: 71518.0 + throughput: 13.982493917615146 + estimated_peak_memory_range: + min: 3284992 + max: 7008096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1028 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1028 + job_id: j7gj1xrxg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 66977.0 + throughput: 14.930498529345895 + estimated_peak_memory_range: + min: 118784 + max: 107384672 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1031 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1031 + job_id: jnp130o25 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:08:05Z' - torchscript_onnx_qnn: - inference_time: 73906.0 - throughput: 13.530701160934159 + inference_time: 65488.0 + throughput: 15.2699731248473 estimated_peak_memory_range: - min: 212992 - max: 212992 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jw56q1n6g + job_id: jz5wxoy6p job_status: Passed - torchscript_onnx_ort: - inference_time: 65787.0 - throughput: 15.20057154148996 + torchscript_onnx: + inference_time: 65810.0 + throughput: 15.1952590791673 estimated_peak_memory_range: - min: 233472 - max: 233472 + min: 270336 + max: 270336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: j7gjkllv5 + job_id: jqp48qevg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:00:22Z' + timestamp: '2024-06-22T23:08:08Z' diff --git a/qai_hub_models/models/regnet/export.py b/qai_hub_models/models/regnet/export.py index 48731313..0954dcb2 100644 --- a/qai_hub_models/models/regnet/export.py +++ b/qai_hub_models/models/regnet/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml index 6dbdb072..627462fa 100644 --- a/qai_hub_models/models/regnet/perf.yaml +++ b/qai_hub_models/models/regnet/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: RegNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 2344.0 - throughput: 426.6211604095563 + inference_time: 2041.0 + throughput: 489.9559039686428 estimated_peak_memory_range: - min: 40960 - max: 2564000 + min: 20480 + max: 2516576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jygzv77op + job_id: jo5m4r9w5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2105.0 - throughput: 475.05938242280286 + inference_time: 2111.0 + throughput: 473.70914258645195 estimated_peak_memory_range: - min: 16384 - max: 66214464 + min: 622592 + max: 11175952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jnp1q888g + job_id: jqpyne87g job_status: Passed - torchscript_onnx_ort: - inference_time: 2313.0 - throughput: 432.33895373973195 + torchscript_onnx: + inference_time: 2362.0 + throughput: 423.3700254022015 estimated_peak_memory_range: - min: 16384 - max: 109504192 + min: 12288 + max: 110151488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jnp1q887g + job_id: j1gl7nw85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:00:51Z' + timestamp: '2024-06-22T23:08:40Z' - torchscript_onnx_tflite: - inference_time: 1623.0 - throughput: 616.1429451632779 + inference_time: 1413.0 + throughput: 707.7140835102618 estimated_peak_memory_range: min: 16384 - max: 137911392 + max: 142891136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jz5wm993g + job_id: jegnx21r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1494.0 - throughput: 669.3440428380187 + inference_time: 1485.0 + throughput: 673.4006734006734 estimated_peak_memory_range: min: 618496 - max: 75619760 + max: 68646048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jvgd7vvrg + job_id: j2p0kyo65 job_status: Passed - torchscript_onnx_ort: - inference_time: 1637.0 - throughput: 610.8735491753207 + torchscript_onnx: + inference_time: 1562.0 + throughput: 640.2048655569782 estimated_peak_memory_range: - min: 0 - max: 37581584 + min: 618496 + max: 35955472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jvgd7vvzg + job_id: jw56v6o0p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:00:52Z' + timestamp: '2024-06-22T23:08:41Z' - torchscript_onnx_tflite: - inference_time: 2318.0 - throughput: 431.40638481449525 + inference_time: 2008.0 + throughput: 498.00796812749 estimated_peak_memory_range: - min: 16384 - max: 2479152 + min: 24576 + max: 1706424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jmg9944wg + job_id: jopr9kx9p job_status: Passed torchscript_onnx_qnn: - inference_time: 2102.0 - throughput: 475.7373929590866 + inference_time: 2109.0 + throughput: 474.158368895211 estimated_peak_memory_range: min: 12288 - max: 14056768 + max: 66527232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jmg99448g + job_id: jogkdz62p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:00:50Z' + timestamp: '2024-06-22T23:08:38Z' + - torchscript_onnx_tflite: + inference_time: 2028.0 + throughput: 493.0966469428008 + estimated_peak_memory_range: + min: 16384 + max: 2181800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 114 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 114 + job_id: jep2j8o45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2108.0 + throughput: 474.3833017077799 + estimated_peak_memory_range: + min: 24576 + max: 16473816 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 188 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 188 + job_id: jn5qw8445 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:08:39Z' - torchscript_onnx_qnn: - inference_time: 2475.0 - throughput: 404.04040404040404 + inference_time: 2204.0 + throughput: 453.7205081669691 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jz5wm99mg + job_id: j1p88ojxp job_status: Passed - torchscript_onnx_ort: - inference_time: 2215.0 - throughput: 451.46726862302484 + torchscript_onnx: + inference_time: 2182.0 + throughput: 458.29514207149407 estimated_peak_memory_range: - min: 651264 - max: 651264 + min: 69373952 + max: 69373952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jz57vdd95 + job_id: j1p38kol5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:00:53Z' + timestamp: '2024-06-22T23:08:42Z' diff --git a/qai_hub_models/models/regnet_quantized/README.md b/qai_hub_models/models/regnet_quantized/README.md new file mode 100644 index 00000000..c20388d0 --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/README.md @@ -0,0 +1,61 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [RegNetQuantized: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/regnet_quantized) + +RegNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. + +This is based on the implementation of RegNetQuantized found +[here](https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/regnet_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on +a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[regnet_quantized]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.regnet_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.regnet_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + +## License +- The license for the original implementation of RegNetQuantized can be found + [here](https://github.com/pytorch/vision/blob/main/LICENSE). +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + +## References +* [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678) +* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py) + +## Community +* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/regnet_quantized/__init__.py b/qai_hub_models/models/regnet_quantized/__init__.py new file mode 100644 index 00000000..5803cf7b --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.app import ( # noqa: F401 + ImagenetClassifierApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import RegNetQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/regnet_quantized/conftest.py b/qai_hub_models/models/regnet_quantized/conftest.py new file mode 100644 index 00000000..692d0feb --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/conftest.py @@ -0,0 +1,37 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.regnet_quantized import Model + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/regnet_quantized/demo.py b/qai_hub_models/models/regnet_quantized/demo.py new file mode 100644 index 00000000..d7513945 --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/demo.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo +from qai_hub_models.models.regnet_quantized.model import MODEL_ID, RegNetQuantizable + + +def main(is_test: bool = False): + imagenet_demo(RegNetQuantizable, MODEL_ID, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/regnet_quantized/evaluate.py b/qai_hub_models/models/regnet_quantized/evaluate.py new file mode 100644 index 00000000..4eb83eec --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.regnet_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/regnet_quantized/export.py b/qai_hub_models/models/regnet_quantized/export.py new file mode 100644 index 00000000..9a3d5b70 --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/export.py @@ -0,0 +1,236 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, cast + +import qai_hub as hub + +from qai_hub_models.models.regnet_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.base_model import TargetRuntime +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, + transpose_channel_first_to_last, +) +from qai_hub_models.utils.qnn_helpers import get_qnn_inputs + + +def export_model( + device: str = "Samsung Galaxy S23 (Family)", + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[ + str +]: + """ + This function accomplishes 6 main tasks: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format. + 2. Compiles the model to an asset that can be run on device. + 3. Profiles the model performance on real devices. + 4. Inferences the model on sample inputs. + 5. Downloads the model asset to the local directory. + 6. Summarizes the results from profiling and inference. + + Each of the last four steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A 3-tuple of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + """ + model_name = "regnet_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if chipset: + hub_device = hub.Device(attributes=f"chipset:{chipset}") + else: + hub_device = hub.Device(name=device) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "regnet_quantized", + "RegNetQuantized", + device, + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # 1. Initialize PyTorch model + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec + ) + if target_runtime == TargetRuntime.TFLITE: + quant_calibration_data = None + else: + quant_calibration_data = model.get_calibration_data(target_runtime, input_spec) + + # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) + channel_last_flags = ( + " --force_channel_last_input image_tensor" + if target_runtime != TargetRuntime.ONNX + else "" + ) + + # 2. Compile the model to an on-device asset + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options + channel_last_flags, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=quant_calibration_data, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profile the model asset on real devices + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Run inference on-device with sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs(input_spec) + hub_inputs = sample_inputs + if target_runtime == TargetRuntime.QNN: + hub_inputs = get_qnn_inputs(compile_job, sample_inputs) + # Convert inputs from channel first to channel last + hub_inputs = ( + sample_inputs + if target_runtime == TargetRuntime.ONNX + else transpose_channel_first_to_last( + "image_tensor", sample_inputs, target_runtime + ) + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=hub_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Download the model asset to a local file + if not skip_downloading: + if target_runtime == TargetRuntime.QNN: + target_runtime_extension = "so" + elif target_runtime == TargetRuntime.TFLITE: + target_runtime_extension = "tflite" + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: + target_runtime_extension = "onnx" + + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download( + str(output_path / f"{model_name}.{target_runtime_extension}") + ) + + # 6. Summarize the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: Dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + torch_out = torch_inference(model, sample_inputs) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + print_inference_metrics( + inference_job, inference_result, torch_out, metrics="psnr,top1,top5" + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device) + + return (compile_job, profile_job, inference_job) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/regnet_quantized/info.yaml b/qai_hub_models/models/regnet_quantized/info.yaml new file mode 100644 index 00000000..170239e5 --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/info.yaml @@ -0,0 +1,44 @@ +name: RegNetQuantized +# id must match with the model dir name in qai_hub_models +id: regnet_quantized +status: public +headline: Imagenet classifier and general purpose backbone. +domain: Computer Vision +description: RegNet is a machine learning model that can classify images from the + Imagenet dataset. It can also be used as a backbone in building more complex models + for specific use cases. +use_case: Image Classification +tags: + - backbone + - quantized +research_paper: https://arxiv.org/abs/2003.13678 +research_paper_title: Designing Network Design Spaces +license: https://github.com/pytorch/vision/blob/main/LICENSE +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py +technical_details: + Model checkpoint: Imagenet + Input resolution: 224x224 + Number of parameters: 15.3M + Model size: 15.4 MB +applicable_scenarios: + - Medical Imaging + - Anomaly Detection + - Inventory Management +related_models: + - mobilenet_v2 + - densenet121 + - googlenet +form_factors: + - Phone + - Tablet + - IoT + - XR +has_static_banner: yes +has_animated_banner: yes +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: + - imagenet-1k + - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/regnet_quantized/model.py b/qai_hub_models/models/regnet_quantized/model.py new file mode 100644 index 00000000..47e79fed --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/model.py @@ -0,0 +1,86 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, + constrain_quantized_inputs_to_image_range, +) + +# isort: on + +import torch +from aimet_torch.cross_layer_equalization import ( + equalize_bn_folded_model, + fold_all_batch_norms, +) +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim + +from qai_hub_models.models.regnet.model import RegNet +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 4 +DEFAULT_ENCODINGS = "regnet_quantized_encodings.json" + + +class RegNetQuantizable(AIMETQuantizableMixin, RegNet): + """RegNet with post train quantization support. + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date.""" + + def __init__( + self, + sim_model: QuantizationSimModel, + ) -> None: + # Input is already normalized by sim_model. Disable it in the wrapper model. + RegNet.__init__(self, sim_model.model, normalize_input=False) + AIMETQuantizableMixin.__init__( + self, + sim_model, + ) + + @classmethod + def from_pretrained( + cls, + aimet_encodings: str | None = "DEFAULT", + ) -> "RegNetQuantizable": + """ + Parameters: + aimet_encodings: + if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette. + elif None: Doesn't load any encodings. Used when computing encodings. + else: Interprets as a filepath and loads the encodings stored there. + """ + model = RegNet.from_pretrained() + input_shape = cls.get_input_spec()["image_tensor"][0] + model = prepare_model(model) + dummy_input = torch.rand(input_shape) + + pairs = fold_all_batch_norms(model, input_shape, dummy_input) + equalize_bn_folded_model(model, input_shape, pairs, dummy_input) + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=dummy_input, + ) + constrain_quantized_inputs_to_image_range(sim) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + load_encodings_to_sim(sim, aimet_encodings) + + return cls(sim) diff --git a/qai_hub_models/models/regnet_quantized/perf.yaml b/qai_hub_models/models/regnet_quantized/perf.yaml new file mode 100644 index 00000000..fa000bc3 --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/perf.yaml @@ -0,0 +1,265 @@ +aggregated: + supported_oses: + - Android + supported_devices: + - Google Pixel 3 + - Google Pixel 3a + - Google Pixel 3a XL + - Google Pixel 4 + - Google Pixel 4a + - Google Pixel 5a 5G + - QCS8250 (Proxy) + - QCS8550 (Proxy) + - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy Tab S8 + - Snapdragon X Elite CRD + - Xiaomi 12 + - Xiaomi 12 Pro + supported_chipsets: + - Qcs8250 + - Qcs8550 + - Sa8540p + - Sa8775p + - Snapdragon® 8 Gen 1 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 3 + - Snapdragon® 888 + - Snapdragon® X Elite +models: +- name: RegNetQuantized + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 898.0 + throughput: 1113.5857461024498 + estimated_peak_memory_range: + min: 28672 + max: 1533712 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 114 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 114 + job_id: j1pv432jp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1009.0 + throughput: 991.0802775024777 + estimated_peak_memory_range: + min: 16384 + max: 63537136 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 113 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 113 + job_id: jnp130125 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-06-22T23:09:40Z' + - torchscript_onnx_tflite: + inference_time: 639.0 + throughput: 1564.9452269170579 + estimated_peak_memory_range: + min: 16384 + max: 131360192 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 114 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 114 + job_id: j7gj1x3xg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 735.0 + throughput: 1360.544217687075 + estimated_peak_memory_range: + min: 163840 + max: 64946208 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 113 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 113 + job_id: jvgd0w4ep + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-06-22T23:09:41Z' + - torchscript_onnx_tflite: + inference_time: 887.0 + throughput: 1127.3957158962796 + estimated_peak_memory_range: + min: 12288 + max: 1452496 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 114 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 114 + job_id: jlpe2961p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1006.0 + throughput: 994.0357852882704 + estimated_peak_memory_range: + min: 24576 + max: 52560944 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 113 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 113 + job_id: jmg98v2wp + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8550 + timestamp: '2024-06-22T23:09:44Z' + - torchscript_onnx_tflite: + inference_time: 892.0 + throughput: 1121.0762331838564 + estimated_peak_memory_range: + min: 12288 + max: 1588400 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 114 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 114 + job_id: jygzwezkg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1011.0 + throughput: 989.1196834817013 + estimated_peak_memory_range: + min: 180224 + max: 11914200 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 113 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 113 + job_id: jnp130185 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:09:45Z' + - torchscript_onnx_tflite: + inference_time: 29271.0 + throughput: 34.1635065423115 + estimated_peak_memory_range: + min: 114688 + max: 75067024 + primary_compute_unit: GPU + precision: int8 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 114 + layers_on_cpu: 0 + total_layers: 114 + job_id: jz5wxoz6p + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs6490 + timestamp: '2024-06-22T23:09:37Z' + - torchscript_onnx_tflite: + inference_time: 42180.0 + throughput: 23.70791844476055 + estimated_peak_memory_range: + min: 299008 + max: 67927560 + primary_compute_unit: GPU + precision: int8 + layer_info: + layers_on_npu: 10 + layers_on_gpu: 91 + layers_on_cpu: 13 + total_layers: 114 + job_id: jmg98v2lp + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8250 + timestamp: '2024-06-22T23:09:38Z' + - torchscript_onnx_qnn: + inference_time: 1104.0 + throughput: 905.7971014492754 + estimated_peak_memory_range: + min: 495616 + max: 495616 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 113 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 113 + job_id: jz5wxoz3p + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-06-22T23:09:42Z' diff --git a/qai_hub_models/models/regnet_quantized/requirements.txt b/qai_hub_models/models/regnet_quantized/requirements.txt new file mode 100644 index 00000000..e3567f29 --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/requirements.txt @@ -0,0 +1 @@ +aimet-torch==1.31.2; sys_platform == "linux" diff --git a/qai_hub_models/models/regnet_quantized/test.py b/qai_hub_models/models/regnet_quantized/test.py new file mode 100644 index 00000000..6018cb2a --- /dev/null +++ b/qai_hub_models/models/regnet_quantized/test.py @@ -0,0 +1,30 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.test_utils import ( + run_imagenet_classifier_test, +) +from qai_hub_models.models.regnet_quantized.demo import main as demo_main +from qai_hub_models.models.regnet_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + RegNetQuantizable, +) + + +def test_task(): + run_imagenet_classifier_test( + RegNetQuantizable.from_pretrained(), + MODEL_ID, + probability_threshold=0.45, + diff_tol=0.005, + atol=0.2, + rtol=0.02, + asset_version=MODEL_ASSET_VERSION, + ) + + +def test_demo(): + # Verify demo does not crash + demo_main(is_test=True) diff --git a/qai_hub_models/models/resnet101/export.py b/qai_hub_models/models/resnet101/export.py index 337029e4..6bc47ee7 100644 --- a/qai_hub_models/models/resnet101/export.py +++ b/qai_hub_models/models/resnet101/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml index c30b049f..61225b6f 100644 --- a/qai_hub_models/models/resnet101/perf.yaml +++ b/qai_hub_models/models/resnet101/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 3383.0 - throughput: 295.5956251847473 + inference_time: 3404.0 + throughput: 293.7720329024677 estimated_peak_memory_range: - min: 16384 - max: 2493664 + min: 204800 + max: 2170488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j1pvzw1rg + job_id: jopr9kd0p job_status: Passed torchscript_onnx_qnn: - inference_time: 3448.0 - throughput: 290.0232018561485 + inference_time: 3471.0 + throughput: 288.1014116969173 estimated_peak_memory_range: - min: 16384 - max: 173843416 + min: 622592 + max: 174493464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jygzv7xxp + job_id: j1p88orkp job_status: Passed - torchscript_onnx_ort: - inference_time: 3614.0 - throughput: 276.70171555063644 + torchscript_onnx: + inference_time: 3563.0 + throughput: 280.662363177098 estimated_peak_memory_range: - min: 45056 - max: 355647168 + min: 12288 + max: 322812304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jvgd7vrzg + job_id: j1p38k735 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:02:27Z' + timestamp: '2024-06-22T23:10:25Z' - torchscript_onnx_tflite: - inference_time: 2440.0 - throughput: 409.8360655737705 + inference_time: 2446.0 + throughput: 408.8307440719542 estimated_peak_memory_range: min: 16384 - max: 109471344 + max: 116075088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j7gjkl0e5 + job_id: jep2j8dr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2514.0 - throughput: 397.77247414478916 + inference_time: 2511.0 + throughput: 398.24771007566704 estimated_peak_memory_range: min: 618496 - max: 81083536 + max: 73670352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jz5wm9dmg + job_id: jogkdz0wp job_status: Passed - torchscript_onnx_ort: - inference_time: 2575.0 - throughput: 388.3495145631068 + torchscript_onnx: + inference_time: 2592.0 + throughput: 385.8024691358025 estimated_peak_memory_range: min: 618496 - max: 46866960 + max: 41407440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jz57vdj95 + job_id: jwgomywq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:02:28Z' + timestamp: '2024-06-22T23:10:26Z' - torchscript_onnx_tflite: - inference_time: 3388.0 - throughput: 295.159386068477 + inference_time: 3411.0 + throughput: 293.1691586045148 estimated_peak_memory_range: - min: 28672 - max: 1888064 + min: 24576 + max: 2249240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jlpe4vrv5 + job_id: jqpyne28g job_status: Passed torchscript_onnx_qnn: - inference_time: 3458.0 - throughput: 289.1844997108155 + inference_time: 3460.0 + throughput: 289.01734104046244 estimated_peak_memory_range: - min: 626688 - max: 163514888 + min: 618496 + max: 152968824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jnp1q8d7g + job_id: j1gl7n8j5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:02:26Z' + timestamp: '2024-06-22T23:10:23Z' + - torchscript_onnx_tflite: + inference_time: 3389.0 + throughput: 295.0722927117144 + estimated_peak_memory_range: + min: 53248 + max: 2091288 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 147 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 147 + job_id: j2p0ky995 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3449.0 + throughput: 289.93911278631487 + estimated_peak_memory_range: + min: 618496 + max: 163506848 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 245 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 245 + job_id: jw56v6m6p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:10:24Z' - torchscript_onnx_qnn: - inference_time: 4024.0 - throughput: 248.5089463220676 + inference_time: 3490.0 + throughput: 286.5329512893983 estimated_peak_memory_range: - min: 1011712 - max: 1011712 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jmg99438g + job_id: jn5qw81n5 job_status: Passed - torchscript_onnx_ort: - inference_time: 3536.0 - throughput: 282.80542986425337 + torchscript_onnx: + inference_time: 3502.0 + throughput: 285.5511136493432 estimated_peak_memory_range: - min: 43122688 - max: 43122688 + min: 48291840 + max: 48291840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jqp4jwx1p + job_id: j1pv43nkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:02:29Z' + timestamp: '2024-06-22T23:10:27Z' diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py index 109a3790..88067ea0 100644 --- a/qai_hub_models/models/resnet101_quantized/export.py +++ b/qai_hub_models/models/resnet101_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet101_quantized/model.py b/qai_hub_models/models/resnet101_quantized/model.py index f8475316..c4cfa229 100644 --- a/qai_hub_models/models/resnet101_quantized/model.py +++ b/qai_hub_models/models/resnet101_quantized/model.py @@ -86,5 +86,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml index 9bf5bafe..2c0ca710 100644 --- a/qai_hub_models/models/resnet101_quantized/perf.yaml +++ b/qai_hub_models/models/resnet101_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -45,8 +49,8 @@ models: inference_time: 1188.0 throughput: 841.7508417508418 estimated_peak_memory_range: - min: 0 - max: 1614400 + min: 12288 + max: 3233240 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jo5mvzw95 + job_id: jlpe29nop job_status: Passed torchscript_onnx_qnn: - inference_time: 1377.0 - throughput: 726.2164124909223 + inference_time: 1371.0 + throughput: 729.3946024799417 estimated_peak_memory_range: min: 12288 - max: 58349752 + max: 58942072 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j2p0er1n5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1486.0 - throughput: 672.9475100942127 - estimated_peak_memory_range: - min: 12288 - max: 87121872 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 151 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 151 - job_id: j1glekemp + job_id: jz576z8vg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:04:23Z' + timestamp: '2024-06-22T23:12:17Z' - torchscript_onnx_tflite: - inference_time: 927.0 - throughput: 1078.7486515641856 + inference_time: 912.0 + throughput: 1096.4912280701753 estimated_peak_memory_range: min: 12288 - max: 93411600 + max: 98535088 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jegnre9q5 + job_id: jygzwe0og job_status: Passed torchscript_onnx_qnn: - inference_time: 1078.0 - throughput: 927.643784786642 + inference_time: 1051.0 + throughput: 951.4747859181732 estimated_peak_memory_range: min: 163840 - max: 66249856 + max: 58236944 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j1p8w73op - job_status: Passed - torchscript_onnx_ort: - inference_time: 1162.0 - throughput: 860.5851979345955 - estimated_peak_memory_range: - min: 0 - max: 47460512 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 151 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 151 - job_id: jw56q1qyg + job_id: jqp48q28g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:04:24Z' + timestamp: '2024-06-22T23:12:18Z' - torchscript_onnx_tflite: - inference_time: 1171.0 - throughput: 853.9709649871904 + inference_time: 1183.0 + throughput: 845.30853761623 estimated_peak_memory_range: min: 12288 - max: 1692848 + max: 1566288 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jopr1y47g + job_id: jz5wxor3p job_status: Passed torchscript_onnx_qnn: - inference_time: 1379.0 - throughput: 725.1631617113851 + inference_time: 1377.0 + throughput: 726.2164124909223 estimated_peak_memory_range: - min: 12288 - max: 47947408 + min: 16384 + max: 398643256 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jn5q927op + job_id: jo5m4rld5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:04:22Z' + timestamp: '2024-06-22T23:12:21Z' - torchscript_onnx_tflite: - inference_time: 4690.0 - throughput: 213.21961620469082 + inference_time: 1187.0 + throughput: 842.4599831508003 + estimated_peak_memory_range: + min: 20480 + max: 54362504 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jmg98vqwp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1376.0 + throughput: 726.7441860465116 estimated_peak_memory_range: min: 12288 - max: 30183472 + max: 49478392 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jegnx2wk5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:12:22Z' + - torchscript_onnx_tflite: + inference_time: 4848.0 + throughput: 206.27062706270627 + estimated_peak_memory_range: + min: 28672 + max: 33426016 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jep23m7qg + job_id: jnp130m85 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:04:17Z' + timestamp: '2024-06-22T23:12:15Z' - torchscript_onnx_tflite: - inference_time: 17058.0 - throughput: 58.62351975612616 + inference_time: 17263.0 + throughput: 57.92735909169901 estimated_peak_memory_range: - min: 40960 - max: 1956688 + min: 32768 + max: 1987040 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jqpyvd4lp + job_id: jvgd0wmrp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:04:18Z' + timestamp: '2024-06-22T23:12:16Z' - torchscript_onnx_qnn: - inference_time: 1381.0 - throughput: 724.112961622013 + inference_time: 1308.0 + throughput: 764.525993883792 estimated_peak_memory_range: - min: 270336 - max: 270336 + min: 491520 + max: 491520 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jogkryln5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1313.0 - throughput: 761.6146230007616 - estimated_peak_memory_range: - min: 24576 - max: 24576 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 151 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 151 - job_id: j1p3qmqn5 + job_id: j0pxmvz3g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:04:25Z' + timestamp: '2024-06-22T23:12:20Z' diff --git a/qai_hub_models/models/resnet18/export.py b/qai_hub_models/models/resnet18/export.py index b4ca9324..7a8b8692 100644 --- a/qai_hub_models/models/resnet18/export.py +++ b/qai_hub_models/models/resnet18/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml index ff6f698a..522c5a5c 100644 --- a/qai_hub_models/models/resnet18/perf.yaml +++ b/qai_hub_models/models/resnet18/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ResNet18 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1416.0 - throughput: 706.2146892655368 + inference_time: 1414.0 + throughput: 707.2135785007072 estimated_peak_memory_range: - min: 61440 - max: 1999640 + min: 32768 + max: 1539816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j1pvzwzrg + job_id: jn5qw8dn5 job_status: Passed torchscript_onnx_qnn: inference_time: 1446.0 throughput: 691.5629322268327 estimated_peak_memory_range: - min: 86016 - max: 83516488 + min: 16384 + max: 72842240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jygzv7vxp + job_id: jwgomy9q5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1350.0 - throughput: 740.7407407407408 + torchscript_onnx: + inference_time: 1330.0 + throughput: 751.8796992481203 estimated_peak_memory_range: - min: 24576 - max: 88328320 + min: 12288 + max: 98529504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jvgd7v7zg + job_id: jz5wxo03p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:04:50Z' + timestamp: '2024-06-22T23:12:56Z' - torchscript_onnx_tflite: - inference_time: 989.0 - throughput: 1011.1223458038422 + inference_time: 986.0 + throughput: 1014.1987829614604 estimated_peak_memory_range: - min: 12288 - max: 25458704 + min: 16384 + max: 26232304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j7gjklke5 + job_id: j1gl7nqj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1017.0 - throughput: 983.284169124877 + inference_time: 1021.0 + throughput: 979.4319294809011 estimated_peak_memory_range: - min: 618496 - max: 29899792 + min: 0 + max: 28437120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jz5wm9mmg + job_id: j1pv43ykp job_status: Passed - torchscript_onnx_ort: - inference_time: 978.0 - throughput: 1022.4948875255624 + torchscript_onnx: + inference_time: 946.0 + throughput: 1057.0824524312895 estimated_peak_memory_range: min: 0 - max: 16899936 + max: 18534672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jz57vdv95 + job_id: jmg98v7wp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:04:51Z' + timestamp: '2024-06-22T23:12:58Z' - torchscript_onnx_tflite: - inference_time: 1412.0 - throughput: 708.2152974504249 + inference_time: 1414.0 + throughput: 707.2135785007072 estimated_peak_memory_range: - min: 28672 - max: 154269408 + min: 45056 + max: 1517840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jlpe4v4v5 + job_id: jw56v606p job_status: Passed torchscript_onnx_qnn: - inference_time: 1473.0 - throughput: 678.8866259334691 + inference_time: 1467.0 + throughput: 681.6632583503749 estimated_peak_memory_range: min: 16384 - max: 72911032 + max: 83495696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jnp1q8q7g + job_id: jlpe290op job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:04:49Z' + timestamp: '2024-06-22T23:12:54Z' + - torchscript_onnx_tflite: + inference_time: 1413.0 + throughput: 707.7140835102618 + estimated_peak_memory_range: + min: 28672 + max: 27219144 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 38 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 38 + job_id: j1p38kr35 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1461.0 + throughput: 684.4626967830253 + estimated_peak_memory_range: + min: 12288 + max: 73021240 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 53 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 53 + job_id: jygzweqog + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:12:55Z' - torchscript_onnx_qnn: - inference_time: 1575.0 - throughput: 634.9206349206349 + inference_time: 1561.0 + throughput: 640.6149903907751 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jmg99498g + job_id: j7gj1x6vg job_status: Passed - torchscript_onnx_ort: - inference_time: 1324.0 - throughput: 755.2870090634441 + torchscript_onnx: + inference_time: 1308.0 + throughput: 764.525993883792 estimated_peak_memory_range: - min: 28278784 - max: 28278784 + min: 28839936 + max: 28839936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jqp4jwj1p + job_id: jnp130k85 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:04:52Z' + timestamp: '2024-06-22T23:12:59Z' diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py index 4ed971cc..9cb2dcd2 100644 --- a/qai_hub_models/models/resnet18_quantized/export.py +++ b/qai_hub_models/models/resnet18_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet18_quantized/model.py b/qai_hub_models/models/resnet18_quantized/model.py index a6c87335..c0c56598 100644 --- a/qai_hub_models/models/resnet18_quantized/model.py +++ b/qai_hub_models/models/resnet18_quantized/model.py @@ -78,5 +78,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml index a76fba6f..85546c98 100644 --- a/qai_hub_models/models/resnet18_quantized/perf.yaml +++ b/qai_hub_models/models/resnet18_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: ResNet18Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 420.0 - throughput: 2380.9523809523807 + inference_time: 419.0 + throughput: 2386.634844868735 estimated_peak_memory_range: min: 12288 - max: 1492608 + max: 1704432 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jo5mvzv95 + job_id: jz576z1vg job_status: Passed torchscript_onnx_qnn: - inference_time: 639.0 - throughput: 1564.9452269170579 + inference_time: 637.0 + throughput: 1569.8587127158555 estimated_peak_memory_range: - min: 24576 - max: 9441728 + min: 16384 + max: 132534952 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: j2p0eren5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 641.0 - throughput: 1560.0624024960998 - estimated_peak_memory_range: - min: 12288 - max: 25595784 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 42 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 42 - job_id: j1glek7mp + job_id: jep2j8qr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:05:34Z' + timestamp: '2024-06-22T23:13:39Z' - torchscript_onnx_tflite: - inference_time: 352.0 - throughput: 2840.909090909091 + inference_time: 347.0 + throughput: 2881.844380403458 estimated_peak_memory_range: min: 16384 - max: 24707232 + max: 26805328 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jegnrerq5 + job_id: jqp48q68g job_status: Passed torchscript_onnx_qnn: - inference_time: 475.0 - throughput: 2105.2631578947367 + inference_time: 480.0 + throughput: 2083.3333333333335 estimated_peak_memory_range: - min: 163840 - max: 28038704 + min: 0 + max: 24359200 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: j1p8w7wop - job_status: Passed - torchscript_onnx_ort: - inference_time: 479.0 - throughput: 2087.6826722338205 - estimated_peak_memory_range: - min: 12288 - max: 20801936 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 42 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 42 - job_id: jw56q1vyg + job_id: jqpynek8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:05:35Z' + timestamp: '2024-06-22T23:13:40Z' - torchscript_onnx_tflite: - inference_time: 420.0 - throughput: 2380.9523809523807 + inference_time: 424.0 + throughput: 2358.490566037736 estimated_peak_memory_range: min: 12288 - max: 1230392 + max: 14983304 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jopr1y17g + job_id: j0pxmv83g job_status: Passed torchscript_onnx_qnn: - inference_time: 632.0 - throughput: 1582.2784810126582 + inference_time: 626.0 + throughput: 1597.444089456869 estimated_peak_memory_range: - min: 16384 - max: 8848856 + min: 12288 + max: 9090424 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: jn5q929op + job_id: j1p88odkp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:05:33Z' + timestamp: '2024-06-22T23:13:42Z' - torchscript_onnx_tflite: - inference_time: 1426.0 - throughput: 701.2622720897616 + inference_time: 418.0 + throughput: 2392.3444976076553 estimated_peak_memory_range: - min: 12288 - max: 15923968 + min: 24576 + max: 127281976 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 39 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 39 + job_id: jo5m4r1d5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 638.0 + throughput: 1567.398119122257 + estimated_peak_memory_range: + min: 16384 + max: 147996392 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 37 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 37 + job_id: jogkdzwwp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:13:44Z' + - torchscript_onnx_tflite: + inference_time: 1444.0 + throughput: 692.5207756232687 + estimated_peak_memory_range: + min: 16384 + max: 16990032 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jep23m3qg + job_id: jegnx2dk5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:05:28Z' + timestamp: '2024-06-22T23:13:36Z' - torchscript_onnx_tflite: - inference_time: 7066.0 - throughput: 141.52278516841213 + inference_time: 7065.0 + throughput: 141.54281670205236 estimated_peak_memory_range: - min: 40960 - max: 6406016 + min: 12288 + max: 2019328 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jqpyvdvlp + job_id: jopr9km0p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:05:29Z' + timestamp: '2024-06-22T23:13:37Z' - torchscript_onnx_qnn: - inference_time: 742.0 - throughput: 1347.7088948787061 + inference_time: 710.0 + throughput: 1408.4507042253522 estimated_peak_memory_range: - min: 1617920 - max: 1617920 + min: 593920 + max: 593920 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: jogkryrn5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 836.0 - throughput: 1196.1722488038276 - estimated_peak_memory_range: - min: 3690496 - max: 3690496 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 42 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 42 - job_id: j1p3qm8n5 + job_id: j2p0ky895 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:05:36Z' + timestamp: '2024-06-22T23:13:41Z' diff --git a/qai_hub_models/models/resnet50/export.py b/qai_hub_models/models/resnet50/export.py index 688099b7..5daa5d27 100644 --- a/qai_hub_models/models/resnet50/export.py +++ b/qai_hub_models/models/resnet50/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml index be4d4c11..c8c0873a 100644 --- a/qai_hub_models/models/resnet50/perf.yaml +++ b/qai_hub_models/models/resnet50/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2269.0 - throughput: 440.72278536800354 + inference_time: 2286.0 + throughput: 437.4453193350831 estimated_peak_memory_range: - min: 16384 - max: 2153680 + min: 20480 + max: 2727384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j1pvzw4rg + job_id: j7gj1x9vg job_status: Passed torchscript_onnx_qnn: - inference_time: 2387.0 - throughput: 418.93590280687056 + inference_time: 2400.0 + throughput: 416.6666666666667 estimated_peak_memory_range: min: 622592 - max: 175232184 + max: 186116792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jygzv7wxp + job_id: jmg98vrwp job_status: Passed - torchscript_onnx_ort: - inference_time: 2366.0 - throughput: 422.654268808115 + torchscript_onnx: + inference_time: 2345.0 + throughput: 426.43923240938165 estimated_peak_memory_range: - min: 12288 - max: 261165672 + min: 16384 + max: 214360704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jvgd7v0zg + job_id: j0pxmvq3g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:06:03Z' + timestamp: '2024-06-22T23:14:20Z' - torchscript_onnx_tflite: - inference_time: 1615.0 - throughput: 619.1950464396285 + inference_time: 1635.0 + throughput: 611.6207951070336 estimated_peak_memory_range: min: 12288 - max: 72992224 + max: 77847184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j7gjkl1e5 + job_id: jlpe29qop job_status: Passed torchscript_onnx_qnn: - inference_time: 1705.0 - throughput: 586.5102639296188 + inference_time: 1711.0 + throughput: 584.4535359438925 estimated_peak_memory_range: min: 0 - max: 51115584 + max: 48436560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5wm9xmg + job_id: jnp130985 job_status: Passed - torchscript_onnx_ort: - inference_time: 1750.0 - throughput: 571.4285714285714 + torchscript_onnx: + inference_time: 1728.0 + throughput: 578.7037037037037 estimated_peak_memory_range: - min: 618496 - max: 34613760 + min: 0 + max: 32669776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jz57vd695 + job_id: jo5m4r7d5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:06:04Z' + timestamp: '2024-06-22T23:14:21Z' - torchscript_onnx_tflite: - inference_time: 2272.0 - throughput: 440.14084507042253 + inference_time: 2275.0 + throughput: 439.56043956043953 estimated_peak_memory_range: - min: 24576 - max: 1714808 + min: 28672 + max: 2508480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jlpe4v2v5 + job_id: jygzwe6og job_status: Passed torchscript_onnx_qnn: - inference_time: 2385.0 - throughput: 419.2872117400419 + inference_time: 2381.0 + throughput: 419.99160016799664 estimated_peak_memory_range: - min: 622592 - max: 175433648 + min: 626688 + max: 185987504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jnp1q837g + job_id: jz576zmvg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:06:02Z' + timestamp: '2024-06-22T23:14:18Z' + - torchscript_onnx_tflite: + inference_time: 2278.0 + throughput: 438.98156277436345 + estimated_peak_memory_range: + min: 24576 + max: 2160032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 79 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 79 + job_id: jz5wxok3p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2393.0 + throughput: 417.88549937317174 + estimated_peak_memory_range: + min: 618496 + max: 186249720 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 126 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 126 + job_id: jqp48q78g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:14:19Z' - torchscript_onnx_qnn: - inference_time: 2608.0 - throughput: 383.4355828220859 + inference_time: 2274.0 + throughput: 439.7537379067722 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jmg99488g + job_id: jvgd0wkrp job_status: Passed - torchscript_onnx_ort: - inference_time: 2295.0 - throughput: 435.7298474945534 + torchscript_onnx: + inference_time: 2307.0 + throughput: 433.4633723450368 estimated_peak_memory_range: - min: 54059008 - max: 54059008 + min: 55226368 + max: 55226368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jqp4jw81p + job_id: jegnx24k5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:06:05Z' + timestamp: '2024-06-22T23:14:22Z' diff --git a/qai_hub_models/models/resnet50_quantized/README.md b/qai_hub_models/models/resnet50_quantized/README.md new file mode 100644 index 00000000..1e962511 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/README.md @@ -0,0 +1,61 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [ResNet50Quantized: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/resnet50_quantized) + +ResNet50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. + +This is based on the implementation of ResNet50Quantized found +[here](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet50_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on +a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[resnet50_quantized]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.resnet50_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.resnet50_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + +## License +- The license for the original implementation of ResNet50Quantized can be found + [here](https://github.com/pytorch/vision/blob/main/LICENSE). +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + +## References +* [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) +* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py) + +## Community +* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/resnet50_quantized/__init__.py b/qai_hub_models/models/resnet50_quantized/__init__.py new file mode 100644 index 00000000..f416a9c4 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.app import ( # noqa: F401 + ImagenetClassifierApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import ResNet50Quantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/resnet50_quantized/conftest.py b/qai_hub_models/models/resnet50_quantized/conftest.py new file mode 100644 index 00000000..717d124e --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/conftest.py @@ -0,0 +1,37 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.resnet50_quantized import Model + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/resnet50_quantized/demo.py b/qai_hub_models/models/resnet50_quantized/demo.py new file mode 100644 index 00000000..02dc3292 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/demo.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo +from qai_hub_models.models.resnet50_quantized.model import MODEL_ID, ResNet50Quantizable + + +def main(is_test: bool = False): + imagenet_demo(ResNet50Quantizable, MODEL_ID, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet50_quantized/evaluate.py b/qai_hub_models/models/resnet50_quantized/evaluate.py new file mode 100644 index 00000000..8fdc840e --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnet50_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet50_quantized/export.py b/qai_hub_models/models/resnet50_quantized/export.py new file mode 100644 index 00000000..167292ed --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/export.py @@ -0,0 +1,236 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, cast + +import qai_hub as hub + +from qai_hub_models.models.resnet50_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.base_model import TargetRuntime +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, + transpose_channel_first_to_last, +) +from qai_hub_models.utils.qnn_helpers import get_qnn_inputs + + +def export_model( + device: str = "Samsung Galaxy S23 (Family)", + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[ + str +]: + """ + This function accomplishes 6 main tasks: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format. + 2. Compiles the model to an asset that can be run on device. + 3. Profiles the model performance on real devices. + 4. Inferences the model on sample inputs. + 5. Downloads the model asset to the local directory. + 6. Summarizes the results from profiling and inference. + + Each of the last four steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A 3-tuple of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + """ + model_name = "resnet50_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if chipset: + hub_device = hub.Device(attributes=f"chipset:{chipset}") + else: + hub_device = hub.Device(name=device) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "resnet50_quantized", + "ResNet50Quantized", + device, + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # 1. Initialize PyTorch model + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec + ) + if target_runtime == TargetRuntime.TFLITE: + quant_calibration_data = None + else: + quant_calibration_data = model.get_calibration_data(target_runtime, input_spec) + + # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) + channel_last_flags = ( + " --force_channel_last_input image_tensor" + if target_runtime != TargetRuntime.ONNX + else "" + ) + + # 2. Compile the model to an on-device asset + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options + channel_last_flags, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=quant_calibration_data, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profile the model asset on real devices + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Run inference on-device with sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs(input_spec) + hub_inputs = sample_inputs + if target_runtime == TargetRuntime.QNN: + hub_inputs = get_qnn_inputs(compile_job, sample_inputs) + # Convert inputs from channel first to channel last + hub_inputs = ( + sample_inputs + if target_runtime == TargetRuntime.ONNX + else transpose_channel_first_to_last( + "image_tensor", sample_inputs, target_runtime + ) + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=hub_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Download the model asset to a local file + if not skip_downloading: + if target_runtime == TargetRuntime.QNN: + target_runtime_extension = "so" + elif target_runtime == TargetRuntime.TFLITE: + target_runtime_extension = "tflite" + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: + target_runtime_extension = "onnx" + + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download( + str(output_path / f"{model_name}.{target_runtime_extension}") + ) + + # 6. Summarize the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: Dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + torch_out = torch_inference(model, sample_inputs) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + print_inference_metrics( + inference_job, inference_result, torch_out, metrics="psnr,top1,top5" + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device) + + return (compile_job, profile_job, inference_job) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet50_quantized/info.yaml b/qai_hub_models/models/resnet50_quantized/info.yaml new file mode 100644 index 00000000..dd32d352 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/info.yaml @@ -0,0 +1,44 @@ +name: ResNet50Quantized +# id must match with the model dir name in qai_hub_models +id: resnet50_quantized +status: public +headline: Imagenet classifier and general purpose backbone. +domain: Computer Vision +use_case: Image Classification +description: ResNet50 is a machine learning model that can classify images from the + Imagenet dataset. It can also be used as a backbone in building more complex models + for specific use cases. +tags: + - backbone + - quantized +research_paper: https://arxiv.org/abs/1512.03385 +research_paper_title: Deep Residual Learning for Image Recognition +license: https://github.com/pytorch/vision/blob/main/LICENSE +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py +technical_details: + Model checkpoint: Imagenet + Input resolution: 224x224 + Number of parameters: 25.5M + Model size: 25.1 MB +applicable_scenarios: + - Medical Imaging + - Anomaly Detection + - Inventory Management +related_models: + - mobilenet_v2 + - densenet121 + - googlenet +form_factors: + - Phone + - Tablet + - IoT + - XR +has_static_banner: yes +has_animated_banner: yes +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: + - imagenet-1k + - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnet50_quantized/model.py b/qai_hub_models/models/resnet50_quantized/model.py new file mode 100644 index 00000000..54f44eb1 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/model.py @@ -0,0 +1,81 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, + constrain_quantized_inputs_to_image_range, +) + +# isort: on + +import torch +from aimet_torch.cross_layer_equalization import equalize_model +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim + +from qai_hub_models.models.resnet50.model import ResNet50 +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 6 +DEFAULT_ENCODINGS = "resnet50_quantized_encodings.json" + + +class ResNet50Quantizable(AIMETQuantizableMixin, ResNet50): + """ResNet with post train quantization support. + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date.""" + + def __init__( + self, + resnet50_model: QuantizationSimModel, + ) -> None: + # Input is already normalized by sim_model. Disable it in the wrapper model. + ResNet50.__init__(self, resnet50_model.model, normalize_input=False) + AIMETQuantizableMixin.__init__( + self, + resnet50_model, + ) + + @classmethod + def from_pretrained( + cls, + aimet_encodings: str | None = "DEFAULT", + ) -> "ResNet50Quantizable": + """ + Parameters: + aimet_encodings: + if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette. + elif None: Doesn't load any encodings. Used when computing encodings. + else: Interprets as a filepath and loads the encodings stored there. + """ + model = ResNet50.from_pretrained() + input_shape = cls.get_input_spec()["image_tensor"][0] + + model = prepare_model(model) + equalize_model(model, input_shape) + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=torch.rand(input_shape), + ) + constrain_quantized_inputs_to_image_range(sim) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + load_encodings_to_sim(sim, aimet_encodings) + + return cls(sim) diff --git a/qai_hub_models/models/resnet50_quantized/perf.yaml b/qai_hub_models/models/resnet50_quantized/perf.yaml new file mode 100644 index 00000000..f8dcd34d --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/perf.yaml @@ -0,0 +1,265 @@ +aggregated: + supported_oses: + - Android + supported_devices: + - Google Pixel 3 + - Google Pixel 3a + - Google Pixel 3a XL + - Google Pixel 4 + - Google Pixel 4a + - Google Pixel 5a 5G + - QCS8250 (Proxy) + - QCS8550 (Proxy) + - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy Tab S8 + - Snapdragon X Elite CRD + - Xiaomi 12 + - Xiaomi 12 Pro + supported_chipsets: + - Qcs8250 + - Qcs8550 + - Sa8540p + - Sa8775p + - Snapdragon® 8 Gen 1 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 3 + - Snapdragon® 888 + - Snapdragon® X Elite +models: +- name: ResNet50Quantized + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 814.0 + throughput: 1228.5012285012285 + estimated_peak_memory_range: + min: 28672 + max: 1573280 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: jep2j81r5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 997.0 + throughput: 1003.0090270812437 + estimated_peak_memory_range: + min: 12288 + max: 7720656 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: j1gl7ndj5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-06-22T23:15:30Z' + - torchscript_onnx_tflite: + inference_time: 616.0 + throughput: 1623.3766233766235 + estimated_peak_memory_range: + min: 16384 + max: 63028352 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: jqpynel8g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 769.0 + throughput: 1300.3901170351105 + estimated_peak_memory_range: + min: 163840 + max: 38764864 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: jw56v6x6p + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-06-22T23:15:31Z' + - torchscript_onnx_tflite: + inference_time: 788.0 + throughput: 1269.0355329949239 + estimated_peak_memory_range: + min: 40960 + max: 1460544 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: j2p0kyw95 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 994.0 + throughput: 1006.0362173038229 + estimated_peak_memory_range: + min: 16384 + max: 262208304 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: jwgomyxq5 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8550 + timestamp: '2024-06-22T23:15:34Z' + - torchscript_onnx_tflite: + inference_time: 816.0 + throughput: 1225.4901960784314 + estimated_peak_memory_range: + min: 12288 + max: 1740040 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: j1p88onkp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1007.0 + throughput: 993.0486593843099 + estimated_peak_memory_range: + min: 16384 + max: 273027080 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: j1pv43jkp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:15:35Z' + - torchscript_onnx_tflite: + inference_time: 2798.0 + throughput: 357.39814152966403 + estimated_peak_memory_range: + min: 12288 + max: 24437488 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: jogkdz1wp + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs6490 + timestamp: '2024-06-22T23:15:27Z' + - torchscript_onnx_tflite: + inference_time: 11354.0 + throughput: 88.07468733485996 + estimated_peak_memory_range: + min: 12288 + max: 3005536 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: jn5qw8nn5 + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: Qcs8250 + timestamp: '2024-06-22T23:15:29Z' + - torchscript_onnx_qnn: + inference_time: 986.0 + throughput: 1014.1987829614604 + estimated_peak_memory_range: + min: 557056 + max: 557056 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: j1p38kd35 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-06-22T23:15:32Z' diff --git a/qai_hub_models/models/resnet50_quantized/requirements.txt b/qai_hub_models/models/resnet50_quantized/requirements.txt new file mode 100644 index 00000000..e3567f29 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/requirements.txt @@ -0,0 +1 @@ +aimet-torch==1.31.2; sys_platform == "linux" diff --git a/qai_hub_models/models/resnet50_quantized/test.py b/qai_hub_models/models/resnet50_quantized/test.py new file mode 100644 index 00000000..55efb858 --- /dev/null +++ b/qai_hub_models/models/resnet50_quantized/test.py @@ -0,0 +1,30 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.test_utils import ( + run_imagenet_classifier_test, +) +from qai_hub_models.models.resnet50_quantized.demo import main as demo_main +from qai_hub_models.models.resnet50_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + ResNet50Quantizable, +) + + +def test_task(): + run_imagenet_classifier_test( + ResNet50Quantizable.from_pretrained(), + MODEL_ID, + probability_threshold=0.45, + diff_tol=0.005, + rtol=0.02, + atol=0.2, + asset_version=MODEL_ASSET_VERSION, + ) + + +def test_demo(): + # Verify demo does not crash + demo_main(is_test=True) diff --git a/qai_hub_models/models/resnext101/export.py b/qai_hub_models/models/resnext101/export.py index e4b05a65..8bf63f21 100644 --- a/qai_hub_models/models/resnext101/export.py +++ b/qai_hub_models/models/resnext101/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml index 499cef3a..bcacfdf7 100644 --- a/qai_hub_models/models/resnext101/perf.yaml +++ b/qai_hub_models/models/resnext101/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ResNeXt101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 6774.0 - throughput: 147.62326542663124 + inference_time: 6589.0 + throughput: 151.76809834572774 estimated_peak_memory_range: - min: 24576 - max: 2449424 + min: 20480 + max: 2663064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j7gjkloe5 + job_id: jvgd0wjrp job_status: Passed torchscript_onnx_qnn: - inference_time: 6930.0 - throughput: 144.3001443001443 + inference_time: 6611.0 + throughput: 151.26304643775526 estimated_peak_memory_range: - min: 16384 - max: 36101088 + min: 12288 + max: 38091512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jz5wm98mg + job_id: jvgd0wjzp job_status: Passed - torchscript_onnx_ort: - inference_time: 6834.0 - throughput: 146.3271875914545 + torchscript_onnx: + inference_time: 7046.0 + throughput: 141.92449616803862 estimated_peak_memory_range: - min: 159744 - max: 453366256 + min: 16384 + max: 442476272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jz5wm984g + job_id: jegnx2jq5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:07:53Z' + timestamp: '2024-06-22T23:16:18Z' - torchscript_onnx_tflite: - inference_time: 4859.0 - throughput: 205.80366330520684 + inference_time: 4688.0 + throughput: 213.31058020477815 estimated_peak_memory_range: min: 20480 - max: 364879056 + max: 365498784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jlpe4v8v5 + job_id: jz5wxojmp job_status: Passed torchscript_onnx_qnn: - inference_time: 4800.0 - throughput: 208.33333333333334 + inference_time: 4818.0 + throughput: 207.55500207555002 estimated_peak_memory_range: - min: 0 - max: 126702208 + min: 618496 + max: 120923264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jmg994k8g + job_id: jz576zq9g job_status: Passed - torchscript_onnx_ort: - inference_time: 5102.0 - throughput: 196.0015680125441 + torchscript_onnx: + inference_time: 4887.0 + throughput: 204.62451401677922 estimated_peak_memory_range: - min: 0 - max: 91577616 + min: 622592 + max: 86448960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jmg994kmg + job_id: jopr9kz7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:07:54Z' + timestamp: '2024-06-22T23:16:19Z' - torchscript_onnx_tflite: - inference_time: 6604.0 - throughput: 151.42337976983646 + inference_time: 6452.0 + throughput: 154.99070055796653 estimated_peak_memory_range: - min: 20480 - max: 3255112 + min: 32768 + max: 2593208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jygzv78xp + job_id: jmg98v68p job_status: Passed torchscript_onnx_qnn: - inference_time: 6571.0 - throughput: 152.18383807639628 + inference_time: 6644.0 + throughput: 150.51173991571343 estimated_peak_memory_range: - min: 0 - max: 35912680 + min: 12288 + max: 35889312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jvgd7v8zg + job_id: j0pxmvwlg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:07:52Z' + timestamp: '2024-06-22T23:16:16Z' + - torchscript_onnx_tflite: + inference_time: 6481.0 + throughput: 154.29717636167257 + estimated_peak_memory_range: + min: 36864 + max: 204925088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 147 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 147 + job_id: jnp130r75 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6714.0 + throughput: 148.94250819183796 + estimated_peak_memory_range: + min: 12288 + max: 35760104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 245 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 245 + job_id: jo5m4rj95 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:16:17Z' - torchscript_onnx_qnn: - inference_time: 9160.0 - throughput: 109.17030567685589 + inference_time: 6823.0 + throughput: 146.5630954125751 estimated_peak_memory_range: - min: 913408 - max: 913408 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jnp1q877g + job_id: jqp48qz1g job_status: Passed - torchscript_onnx_ort: - inference_time: 6731.0 - throughput: 148.5663348685188 + torchscript_onnx: + inference_time: 6711.0 + throughput: 149.00908955446283 estimated_peak_memory_range: - min: 117399552 - max: 117399552 + min: 128811008 + max: 128811008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jnp1q87ng + job_id: jep2j82q5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:07:55Z' + timestamp: '2024-06-22T23:16:20Z' diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py index 007bc290..16037094 100644 --- a/qai_hub_models/models/resnext101_quantized/export.py +++ b/qai_hub_models/models/resnext101_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext101_quantized/model.py b/qai_hub_models/models/resnext101_quantized/model.py index 521a1463..82c48e43 100644 --- a/qai_hub_models/models/resnext101_quantized/model.py +++ b/qai_hub_models/models/resnext101_quantized/model.py @@ -78,5 +78,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml index 1537f3e3..becd66c5 100644 --- a/qai_hub_models/models/resnext101_quantized/perf.yaml +++ b/qai_hub_models/models/resnext101_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: ResNeXt101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2846.0 - throughput: 351.37034434293747 + inference_time: 2769.0 + throughput: 361.14120621162874 estimated_peak_memory_range: - min: 28672 - max: 2113784 + min: 20480 + max: 2649408 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jqp4jwm2p + job_id: j2p0kynn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3084.0 - throughput: 324.25421530479895 + inference_time: 3056.0 + throughput: 327.22513089005236 estimated_peak_memory_range: - min: 16384 - max: 35906456 + min: 12288 + max: 34308616 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jep23m46g - job_status: Passed - torchscript_onnx_ort: - inference_time: 3364.0 - throughput: 297.2651605231867 - estimated_peak_memory_range: - min: 12288 - max: 140467400 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 151 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 151 - job_id: jogkry9v5 + job_id: j1p38kkn5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:09:57Z' + timestamp: '2024-06-22T23:18:19Z' - torchscript_onnx_tflite: - inference_time: 2072.0 - throughput: 482.6254826254826 + inference_time: 2080.0 + throughput: 480.7692307692308 estimated_peak_memory_range: min: 12288 - max: 258677904 + max: 263165600 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: j0pxe1385 + job_id: j1p88olop job_status: Passed torchscript_onnx_qnn: - inference_time: 2331.0 - throughput: 429.000429000429 + inference_time: 2282.0 + throughput: 438.21209465381247 estimated_peak_memory_range: - min: 12288 - max: 119524448 + min: 24576 + max: 115774864 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jqpyvdq0p - job_status: Passed - torchscript_onnx_ort: - inference_time: 2469.0 - throughput: 405.0222762251924 - estimated_peak_memory_range: - min: 12288 - max: 93879712 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 151 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 151 - job_id: jn5q92mep + job_id: jwgomyyk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:09:58Z' + timestamp: '2024-06-22T23:18:20Z' - torchscript_onnx_tflite: - inference_time: 2846.0 - throughput: 351.37034434293747 + inference_time: 2808.0 + throughput: 356.1253561253561 estimated_peak_memory_range: - min: 16384 - max: 2438744 + min: 28672 + max: 2175304 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jo5mvzo75 + job_id: jogkdzjnp job_status: Passed torchscript_onnx_qnn: - inference_time: 3060.0 - throughput: 326.797385620915 + inference_time: 2990.0 + throughput: 334.44816053511704 estimated_peak_memory_range: - min: 16384 - max: 35555384 + min: 20480 + max: 33832856 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j1p8w74qp + job_id: j7gj1xxeg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:09:56Z' + timestamp: '2024-06-22T23:18:23Z' - torchscript_onnx_tflite: - inference_time: 10195.0 - throughput: 98.0872976949485 + inference_time: 2754.0 + throughput: 363.10820624546113 + estimated_peak_memory_range: + min: 57344 + max: 2549704 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 148 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 148 + job_id: jn5qw8jo5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3026.0 + throughput: 330.4692663582287 + estimated_peak_memory_range: + min: 16384 + max: 34367352 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jlpe299vp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:18:24Z' + - torchscript_onnx_tflite: + inference_time: 10200.0 + throughput: 98.03921568627452 estimated_peak_memory_range: min: 53248 - max: 195935712 + max: 200216400 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jegnreoj5 + job_id: j1gl7nnm5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:09:51Z' + timestamp: '2024-06-22T23:18:17Z' - torchscript_onnx_tflite: - inference_time: 131262.0 - throughput: 7.618351084091359 + inference_time: 133033.0 + throughput: 7.516931889080153 estimated_peak_memory_range: min: 12288 - max: 356618752 + max: 353652680 primary_compute_unit: GPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 125 layers_on_cpu: 11 total_layers: 148 - job_id: jopr1yokg + job_id: jw56v66yp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:09:52Z' + timestamp: '2024-06-22T23:18:18Z' - torchscript_onnx_qnn: - inference_time: 3311.0 - throughput: 302.0235578375113 + inference_time: 3044.0 + throughput: 328.515111695138 estimated_peak_memory_range: - min: 262144 - max: 262144 + min: 290816 + max: 290816 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j2p0erv05 - job_status: Passed - torchscript_onnx_ort: - inference_time: 3294.0 - throughput: 303.58227079538557 - estimated_peak_memory_range: - min: 12066816 - max: 12066816 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 151 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 151 - job_id: j1glek12p + job_id: j1pv433rp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:09:59Z' + timestamp: '2024-06-22T23:18:21Z' diff --git a/qai_hub_models/models/resnext50/export.py b/qai_hub_models/models/resnext50/export.py index ecbf5998..b7d7e673 100644 --- a/qai_hub_models/models/resnext50/export.py +++ b/qai_hub_models/models/resnext50/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml index 6ce25b12..44c68714 100644 --- a/qai_hub_models/models/resnext50/perf.yaml +++ b/qai_hub_models/models/resnext50/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: ResNeXt50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2511.0 - throughput: 398.24771007566704 + inference_time: 2467.0 + throughput: 405.35062829347385 estimated_peak_memory_range: - min: 12288 - max: 2265792 + min: 16384 + max: 2377904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j1p3qmwm5 + job_id: jqp48qq1g job_status: Passed torchscript_onnx_qnn: - inference_time: 2553.0 - throughput: 391.6960438699569 + inference_time: 2558.0 + throughput: 390.93041438623925 estimated_peak_memory_range: - min: 57344 - max: 21403728 + min: 16384 + max: 98356192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j7gjklw15 + job_id: jopr9kk7p job_status: Passed - torchscript_onnx_ort: - inference_time: 2768.0 - throughput: 361.271676300578 + torchscript_onnx: + inference_time: 2763.0 + throughput: 361.92544335866813 estimated_peak_memory_range: - min: 16384 - max: 171552072 + min: 12288 + max: 174229864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jmg994xmg + job_id: jogkdzznp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:10:26Z' + timestamp: '2024-06-22T23:19:00Z' - torchscript_onnx_tflite: - inference_time: 1800.0 - throughput: 555.5555555555555 + inference_time: 1772.0 + throughput: 564.3340857787811 estimated_peak_memory_range: - min: 16384 - max: 163995360 + min: 12288 + max: 178219440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jwgoev41p + job_id: j0pxmvvlg job_status: Passed torchscript_onnx_qnn: - inference_time: 1878.0 - throughput: 532.4813631522896 + inference_time: 1847.0 + throughput: 541.4185165132648 estimated_peak_memory_range: - min: 0 - max: 60231440 + min: 618496 + max: 58117920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jlpe4vl85 + job_id: jep2j88q5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1929.0 - throughput: 518.4033177812338 + torchscript_onnx: + inference_time: 1955.0 + throughput: 511.5089514066496 estimated_peak_memory_range: min: 618496 - max: 41928304 + max: 37332368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jnp1q8vng + job_id: jn5qw88o5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:10:27Z' + timestamp: '2024-06-22T23:19:01Z' - torchscript_onnx_tflite: - inference_time: 2498.0 - throughput: 400.320256204964 + inference_time: 2479.0 + throughput: 403.3884630899556 estimated_peak_memory_range: - min: 20480 - max: 2219560 + min: 16384 + max: 2012488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j1pvzw9zg + job_id: jo5m4rr95 job_status: Passed torchscript_onnx_qnn: - inference_time: 2553.0 - throughput: 391.6960438699569 + inference_time: 2546.0 + throughput: 392.77297721916733 estimated_peak_memory_range: - min: 20480 - max: 88251120 + min: 446464 + max: 88449504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5wm914g + job_id: j2p0kyyn5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:10:25Z' + timestamp: '2024-06-22T23:18:58Z' + - torchscript_onnx_tflite: + inference_time: 2464.0 + throughput: 405.84415584415586 + estimated_peak_memory_range: + min: 20480 + max: 2390560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 79 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 79 + job_id: jegnx22q5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2557.0 + throughput: 391.08330074305826 + estimated_peak_memory_range: + min: 622592 + max: 88352592 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 126 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 126 + job_id: j1p88ooop + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:18:59Z' - torchscript_onnx_qnn: - inference_time: 2941.0 - throughput: 340.02040122407345 + inference_time: 2635.0 + throughput: 379.5066413662239 estimated_peak_memory_range: - min: 1044480 - max: 1044480 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jygzv744p + job_id: jqpyneelg job_status: Passed - torchscript_onnx_ort: - inference_time: 2624.0 - throughput: 381.0975609756098 + torchscript_onnx: + inference_time: 2628.0 + throughput: 380.517503805175 estimated_peak_memory_range: - min: 46874624 - max: 46874624 + min: 39387136 + max: 39387136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jvgd7vz6g + job_id: j1gl7nzm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:10:28Z' + timestamp: '2024-06-22T23:19:02Z' diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py index 73b7aa57..484ba217 100644 --- a/qai_hub_models/models/resnext50_quantized/export.py +++ b/qai_hub_models/models/resnext50_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext50_quantized/model.py b/qai_hub_models/models/resnext50_quantized/model.py index 0bbd1d35..101378c3 100644 --- a/qai_hub_models/models/resnext50_quantized/model.py +++ b/qai_hub_models/models/resnext50_quantized/model.py @@ -78,5 +78,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml index b5dae635..76846bce 100644 --- a/qai_hub_models/models/resnext50_quantized/perf.yaml +++ b/qai_hub_models/models/resnext50_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: ResNeXt50Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 943.0 - throughput: 1060.4453870625662 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: - min: 32768 - max: 1732496 + min: 24576 + max: 1806704 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jqp4jw92p + job_id: j1p38k3n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1179.0 - throughput: 848.1764206955047 + inference_time: 1167.0 + throughput: 856.898029134533 estimated_peak_memory_range: - min: 20480 - max: 66746984 + min: 16384 + max: 67744776 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jep23mv6g - job_status: Passed - torchscript_onnx_ort: - inference_time: 1353.0 - throughput: 739.0983000739099 - estimated_peak_memory_range: - min: 28672 - max: 79646016 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 83 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 83 - job_id: jn5q92vep + job_id: jz5wxovmp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:11:25Z' + timestamp: '2024-06-22T23:19:56Z' - torchscript_onnx_tflite: - inference_time: 710.0 - throughput: 1408.4507042253522 + inference_time: 706.0 + throughput: 1416.4305949008499 estimated_peak_memory_range: - min: 1523712 - max: 101683104 + min: 12288 + max: 102621408 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: j0pxe1d85 + job_id: jwgomy0k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 873.0 - throughput: 1145.475372279496 + inference_time: 864.0 + throughput: 1157.4074074074074 estimated_peak_memory_range: min: 163840 - max: 57724624 + max: 52528384 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jqpyvd70p - job_status: Passed - torchscript_onnx_ort: - inference_time: 991.0 - throughput: 1009.0817356205853 - estimated_peak_memory_range: - min: 28672 - max: 41643216 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 83 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 83 - job_id: j1glekl2p + job_id: jmg98v18p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:11:26Z' + timestamp: '2024-06-22T23:19:57Z' - torchscript_onnx_tflite: - inference_time: 944.0 - throughput: 1059.322033898305 + inference_time: 917.0 + throughput: 1090.5125408942204 estimated_peak_memory_range: min: 12288 - max: 2151184 + max: 32429552 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jo5mvzd75 + job_id: j1pv43orp job_status: Passed torchscript_onnx_qnn: - inference_time: 1183.0 - throughput: 845.30853761623 + inference_time: 1166.0 + throughput: 857.6329331046312 estimated_peak_memory_range: - min: 16384 - max: 66707936 + min: 20480 + max: 11826992 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jogkry8v5 + job_id: jvgd0w9zp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:11:24Z' + timestamp: '2024-06-22T23:20:01Z' - torchscript_onnx_tflite: - inference_time: 3287.0 - throughput: 304.228780042592 + inference_time: 936.0 + throughput: 1068.3760683760684 estimated_peak_memory_range: - min: 12288 - max: 55813072 + min: 16384 + max: 1454864 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jegnre7j5 + job_id: j7gj1xmeg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1174.0 + throughput: 851.7887563884157 + estimated_peak_memory_range: + min: 16384 + max: 65522632 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: jz576zw9g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:20:02Z' + - torchscript_onnx_tflite: + inference_time: 3071.0 + throughput: 325.626831650928 + estimated_peak_memory_range: + min: 24576 + max: 56368512 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: jlpe291vp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:11:19Z' + timestamp: '2024-06-22T23:19:54Z' - torchscript_onnx_tflite: - inference_time: 64039.0 - throughput: 15.615484314246006 + inference_time: 65697.0 + throughput: 15.221395193083398 estimated_peak_memory_range: - min: 868352 - max: 98172464 + min: 57344 + max: 97840808 primary_compute_unit: GPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 57 layers_on_cpu: 11 total_layers: 80 - job_id: jopr1ynkg + job_id: jygzwe9xg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:11:20Z' + timestamp: '2024-06-22T23:19:55Z' - torchscript_onnx_qnn: - inference_time: 1350.0 - throughput: 740.7407407407408 + inference_time: 1203.0 + throughput: 831.255195344971 estimated_peak_memory_range: - min: 1429504 - max: 1429504 + min: 458752 + max: 458752 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: j2p0er605 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1264.0 - throughput: 791.1392405063291 - estimated_peak_memory_range: - min: 24887296 - max: 24887296 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 83 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 83 - job_id: jw56q1wng + job_id: jnp130l75 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:11:27Z' + timestamp: '2024-06-22T23:19:59Z' diff --git a/qai_hub_models/models/riffusion_quantized/export.py b/qai_hub_models/models/riffusion_quantized/export.py index 428d3b0f..9ad9d12c 100644 --- a/qai_hub_models/models/riffusion_quantized/export.py +++ b/qai_hub_models/models/riffusion_quantized/export.py @@ -27,7 +27,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py index 962a8058..3602e58c 100644 --- a/qai_hub_models/models/sam/export.py +++ b/qai_hub_models/models/sam/export.py @@ -34,7 +34,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -128,7 +128,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -204,7 +203,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -250,8 +249,8 @@ def main(): model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml index 370ec409..d2f22527 100644 --- a/qai_hub_models/models/sam/perf.yaml +++ b/qai_hub_models/models/sam/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,19 +38,19 @@ models: - name: SAMDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 48230.0 - throughput: 20.733982998133943 + inference_time: 48549.0 + throughput: 20.59774660652125 estimated_peak_memory_range: - min: 4026368 - max: 7727688 + min: 4288512 + max: 13324816 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 340 + layers_on_npu: 341 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 340 - job_id: j7gjklq15 + total_layers: 341 + job_id: jqpynewlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,21 +59,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:12:47Z' + timestamp: '2024-06-22T23:21:23Z' - torchscript_onnx_tflite: - inference_time: 34548.0 - throughput: 28.9452356142179 + inference_time: 35232.0 + throughput: 28.38328792007266 estimated_peak_memory_range: - min: 12288 - max: 245149360 + min: 2129920 + max: 257126480 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 340 + layers_on_npu: 341 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 340 - job_id: jygzv7n4p + total_layers: 341 + job_id: j1p88o9op job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,21 +82,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:12:49Z' + timestamp: '2024-06-22T23:21:26Z' - torchscript_onnx_tflite: - inference_time: 48060.0 - throughput: 20.807324178110694 + inference_time: 47742.0 + throughput: 20.945917640651835 estimated_peak_memory_range: - min: 4009984 - max: 12530416 + min: 4038656 + max: 13371224 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 340 + layers_on_npu: 341 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 340 - job_id: jmg994dmg + total_layers: 341 + job_id: jn5qw8ko5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,23 +105,46 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:12:50Z' + timestamp: '2024-06-22T23:21:28Z' + - torchscript_onnx_tflite: + inference_time: 47985.0 + throughput: 20.83984578514119 + estimated_peak_memory_range: + min: 4038656 + max: 7051640 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 341 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 341 + job_id: jw56v6lyp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:21:30Z' - name: SAMEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 12009970.0 - throughput: 0.08326415469813829 + inference_time: 11689749.0 + throughput: 0.0855450360824685 estimated_peak_memory_range: - min: 2723000320 - max: 2727292856 + min: 2661216256 + max: 2670815904 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 37 - layers_on_cpu: 771 - total_layers: 808 - job_id: jlpe4vy85 + layers_on_cpu: 783 + total_layers: 820 + job_id: j2p0kyqn5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -128,21 +153,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:12:47Z' + timestamp: '2024-06-22T23:21:24Z' - torchscript_onnx_tflite: - inference_time: 9639117.0 - throughput: 0.10374394252087614 + inference_time: 10416533.0 + throughput: 0.09600123188780758 estimated_peak_memory_range: - min: 2582843392 - max: 2946188672 + min: 2565206016 + max: 2927444288 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 37 - layers_on_cpu: 771 - total_layers: 808 - job_id: jz5wm944g + layers_on_cpu: 783 + total_layers: 820 + job_id: jogkdznnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -151,21 +176,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:12:49Z' + timestamp: '2024-06-22T23:21:26Z' - torchscript_onnx_tflite: - inference_time: 11285658.0 - throughput: 0.08860803685527242 + inference_time: 11668449.0 + throughput: 0.08570119302059769 estimated_peak_memory_range: - min: 2642145280 - max: 2645812336 + min: 2719719424 + max: 2724482808 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 37 - layers_on_cpu: 771 - total_layers: 808 - job_id: jnp1q86ng + layers_on_cpu: 783 + total_layers: 820 + job_id: j1gl7nrm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -174,4 +199,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:12:51Z' + timestamp: '2024-06-22T23:21:28Z' diff --git a/qai_hub_models/models/sesr_m5/export.py b/qai_hub_models/models/sesr_m5/export.py index de36e9d6..873520ad 100644 --- a/qai_hub_models/models/sesr_m5/export.py +++ b/qai_hub_models/models/sesr_m5/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/sesr_m5/info.yaml b/qai_hub_models/models/sesr_m5/info.yaml index f6715c77..cfe28c66 100644 --- a/qai_hub_models/models/sesr_m5/info.yaml +++ b/qai_hub_models/models/sesr_m5/info.yaml @@ -15,7 +15,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr technical_details: Model checkpoint: sesr_m5_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 338K Model size: 1.30 MB applicable_scenarios: diff --git a/qai_hub_models/models/sesr_m5/model.py b/qai_hub_models/models/sesr_m5/model.py index b8aa863e..43627434 100644 --- a/qai_hub_models/models/sesr_m5/model.py +++ b/qai_hub_models/models/sesr_m5/model.py @@ -41,6 +41,5 @@ def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> SESR_M5: ) checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) - model.eval() return cls(model, scale_factor) diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml index 04e56cbe..196c4b2d 100644 --- a/qai_hub_models/models/sesr_m5/perf.yaml +++ b/qai_hub_models/models/sesr_m5/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: SESR-M5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2201.0 - throughput: 454.3389368468878 + inference_time: 2231.0 + throughput: 448.22949350067233 estimated_peak_memory_range: - min: 16384 - max: 2206696 + min: 24576 + max: 1518464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j1gle1ymp + job_id: jvgd0wlzp job_status: Passed torchscript_onnx_qnn: - inference_time: 2133.0 - throughput: 468.8232536333802 + inference_time: 2138.0 + throughput: 467.7268475210477 estimated_peak_memory_range: - min: 2113536 - max: 6868544 + min: 12288 + max: 5006592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jwgoe4lkp + job_id: jo5m4ry95 job_status: Passed - torchscript_onnx_ort: - inference_time: 2875.0 - throughput: 347.82608695652175 + torchscript_onnx: + inference_time: 2890.0 + throughput: 346.02076124567475 estimated_peak_memory_range: - min: 12288 - max: 6151368 + min: 16384 + max: 6069072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jygzv4lxp + job_id: j2p0ky7n5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:58:43Z' + timestamp: '2024-06-22T23:22:11Z' - torchscript_onnx_tflite: - inference_time: 1621.0 - throughput: 616.9031462060457 + inference_time: 1678.0 + throughput: 595.9475566150179 estimated_peak_memory_range: min: 16384 - max: 25573456 + max: 26285840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jw56qd8yg + job_id: jz576z39g job_status: Passed torchscript_onnx_qnn: - inference_time: 1460.0 - throughput: 684.931506849315 + inference_time: 1461.0 + throughput: 684.4626967830253 estimated_peak_memory_range: min: 204800 - max: 26892880 + max: 21246608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j1pvz9lrg + job_id: jegnx28q5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1954.0 - throughput: 511.77072671443193 + torchscript_onnx: + inference_time: 1939.0 + throughput: 515.7297576070139 estimated_peak_memory_range: min: 212992 - max: 20764320 + max: 21682432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jz5wm1lmg + job_id: j1p88ovop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:58:43Z' + timestamp: '2024-06-22T23:22:12Z' - torchscript_onnx_tflite: - inference_time: 2290.0 - throughput: 436.68122270742356 + inference_time: 2241.0 + throughput: 446.2293618920125 estimated_peak_memory_range: min: 28672 - max: 8571536 + max: 1296088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j1p3qwzn5 + job_id: jqp48q01g job_status: Passed torchscript_onnx_qnn: - inference_time: 2135.0 - throughput: 468.384074941452 + inference_time: 2148.0 + throughput: 465.54934823091247 estimated_peak_memory_range: - min: 16384 - max: 9688296 + min: 57344 + max: 4305864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jlpe4l7v5 + job_id: jep2j8nq5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:58:42Z' + timestamp: '2024-06-22T23:22:08Z' + - torchscript_onnx_tflite: + inference_time: 2247.0 + throughput: 445.0378282153983 + estimated_peak_memory_range: + min: 24576 + max: 1504000 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 22 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 25 + job_id: j0pxmv2lg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2129.0 + throughput: 469.7040864255519 + estimated_peak_memory_range: + min: 16384 + max: 79333368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 31 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 31 + job_id: jqpyne0lg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:22:10Z' - torchscript_onnx_qnn: - inference_time: 2971.0 - throughput: 336.58700774150117 + inference_time: 2424.0 + throughput: 412.54125412541254 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j7gjkwre5 + job_id: jopr9kj7p job_status: Passed - torchscript_onnx_ort: - inference_time: 2911.0 - throughput: 343.52456200618343 + torchscript_onnx: + inference_time: 2941.0 + throughput: 340.02040122407345 estimated_peak_memory_range: - min: 12976128 - max: 12976128 + min: 12984320 + max: 12984320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jmg99xz8g + job_id: jogkdzmnp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:58:44Z' + timestamp: '2024-06-22T23:22:13Z' diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py index 770f96a9..bcdedef7 100644 --- a/qai_hub_models/models/sesr_m5_quantized/export.py +++ b/qai_hub_models/models/sesr_m5_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +216,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/sesr_m5_quantized/info.yaml b/qai_hub_models/models/sesr_m5_quantized/info.yaml index 1852e48b..608c2f05 100644 --- a/qai_hub_models/models/sesr_m5_quantized/info.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/info.yaml @@ -15,7 +15,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr technical_details: Model checkpoint: sesr_m5_4x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 338K Model size: 389 KB applicable_scenarios: diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py index 2d6a806d..c6638c3d 100644 --- a/qai_hub_models/models/sesr_m5_quantized/model.py +++ b/qai_hub_models/models/sesr_m5_quantized/model.py @@ -73,6 +73,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() - return cls(sim, scale_factor) diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml index 617858a7..c02b6215 100644 --- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml @@ -8,11 +8,11 @@ aggregated: - Google Pixel 4 - Google Pixel 4a - Google Pixel 5a 5G - - QCS6490 (Proxy) - QCS8250 (Proxy) - QCS8550 (Proxy) - - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -30,9 +30,10 @@ aggregated: - Xiaomi 12 - Xiaomi 12 Pro supported_chipsets: - - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +43,11 @@ models: - name: SESR-M5-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1220.0 - throughput: 819.672131147541 + inference_time: 1222.0 + throughput: 818.3306055646481 estimated_peak_memory_range: - min: 24576 - max: 1557800 + min: 36864 + max: 1336288 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jvgd7zdzg + job_id: j1gl7zmm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1050.0 - throughput: 952.3809523809524 + inference_time: 1070.0 + throughput: 934.5794392523364 estimated_peak_memory_range: - min: 65536 - max: 4040712 + min: 12288 + max: 9384952 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 26 - job_id: jegnr7zq5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1055.0 - throughput: 947.8672985781991 - estimated_peak_memory_range: - min: 12288 - max: 4410832 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 29 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 29 - job_id: j2p0evmn5 + job_id: jlpe21mvp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:59:10Z' + timestamp: '2024-06-22T23:22:39Z' - torchscript_onnx_tflite: - inference_time: 1043.0 - throughput: 958.7727708533077 + inference_time: 1015.0 + throughput: 985.2216748768473 estimated_peak_memory_range: - min: 16384 - max: 23270336 + min: 12288 + max: 23781808 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jz57v7e95 + job_id: jw56vj4yp job_status: Passed torchscript_onnx_qnn: - inference_time: 754.0 - throughput: 1326.2599469496022 + inference_time: 755.0 + throughput: 1324.5033112582782 estimated_peak_memory_range: - min: 0 - max: 21775952 + min: 61440 + max: 22369056 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 26 - job_id: jopr1nl7g - job_status: Passed - torchscript_onnx_ort: - inference_time: 808.0 - throughput: 1237.6237623762377 - estimated_peak_memory_range: - min: 24576 - max: 16135216 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 29 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 29 - job_id: j1p8w4eop + job_id: jygzw9dxg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:59:11Z' + timestamp: '2024-06-22T23:22:40Z' - torchscript_onnx_tflite: - inference_time: 1214.0 - throughput: 823.7232289950576 + inference_time: 1241.0 + throughput: 805.8017727639001 estimated_peak_memory_range: min: 12288 - max: 1428272 + max: 2099752 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jqp4j9y1p + job_id: j1p3830n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1049.0 - throughput: 953.2888465204957 + inference_time: 1041.0 + throughput: 960.6147934678194 estimated_peak_memory_range: min: 12288 - max: 80506384 + max: 15321680 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 26 - job_id: jqpyv7olp + job_id: jmg981n8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +155,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:59:09Z' + timestamp: '2024-06-22T23:22:43Z' - torchscript_onnx_tflite: - inference_time: 3090.0 - throughput: 323.62459546925567 + inference_time: 1227.0 + throughput: 814.9959250203749 estimated_peak_memory_range: min: 12288 - max: 16873840 + max: 1397720 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 22 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 25 + job_id: jwgom06k5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1041.0 + throughput: 960.6147934678194 + estimated_peak_memory_range: + min: 16384 + max: 55896296 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 26 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 26 + job_id: jnp13lz75 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:22:44Z' + - torchscript_onnx_tflite: + inference_time: 3105.0 + throughput: 322.061191626409 + estimated_peak_memory_range: + min: 49152 + max: 17239376 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j0pxedll5 + job_id: j1pv4okrp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +216,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-11T11:59:04Z' + timestamp: '2024-06-22T23:22:37Z' - torchscript_onnx_tflite: - inference_time: 16778.0 - throughput: 59.60185957801883 + inference_time: 15911.0 + throughput: 62.84960090503425 estimated_peak_memory_range: - min: 249856 - max: 7422256 + min: 3657728 + max: 6562240 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +230,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jo5mvd095 + job_id: j7gj1mneg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,10 +239,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-11T11:59:05Z' + timestamp: '2024-06-22T23:22:38Z' - torchscript_onnx_qnn: - inference_time: 1198.0 - throughput: 834.7245409015025 + inference_time: 1053.0 + throughput: 949.667616334283 estimated_peak_memory_range: min: 57344 max: 57344 @@ -244,22 +253,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 26 - job_id: jep23vrqg - job_status: Passed - torchscript_onnx_ort: - inference_time: 1092.0 - throughput: 915.7509157509157 - estimated_peak_memory_range: - min: 5398528 - max: 5398528 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 29 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 29 - job_id: jogkr92n5 + job_id: jz5wxv6mp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +262,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:59:12Z' + timestamp: '2024-06-22T23:22:42Z' diff --git a/qai_hub_models/models/shufflenet_v2/export.py b/qai_hub_models/models/shufflenet_v2/export.py index a3bfc586..7a11fba9 100644 --- a/qai_hub_models/models/shufflenet_v2/export.py +++ b/qai_hub_models/models/shufflenet_v2/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml index 0d002af4..b3ddccf5 100644 --- a/qai_hub_models/models/shufflenet_v2/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Shufflenet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1229.0 - throughput: 813.6696501220505 + inference_time: 1230.0 + throughput: 813.0081300813008 estimated_peak_memory_range: - min: 49152 - max: 1892400 + min: 24576 + max: 1946184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jqpyvdx0p + job_id: jqp48or2g job_status: Passed torchscript_onnx_qnn: - inference_time: 767.0 - throughput: 1303.7809647979138 + inference_time: 779.0 + throughput: 1283.6970474967907 estimated_peak_memory_range: - min: 12288 - max: 127973560 + min: 622592 + max: 4960848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jogkry2v5 + job_id: jopr9q3kp job_status: Passed - torchscript_onnx_ort: - inference_time: 1093.0 - throughput: 914.9130832570905 + torchscript_onnx: + inference_time: 1070.0 + throughput: 934.5794392523364 estimated_peak_memory_range: - min: 0 - max: 4739736 + min: 12288 + max: 11408736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: j1p3qmzm5 + job_id: jogkdnevp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:14:43Z' + timestamp: '2024-06-22T23:23:19Z' - torchscript_onnx_tflite: - inference_time: 816.0 - throughput: 1225.4901960784314 + inference_time: 815.0 + throughput: 1226.993865030675 estimated_peak_memory_range: - min: 12288 - max: 34358736 + min: 16384 + max: 37553248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j2p0erm05 + job_id: j0pxmjo8g job_status: Passed torchscript_onnx_qnn: inference_time: 519.0 throughput: 1926.7822736030828 estimated_peak_memory_range: - min: 12288 - max: 59916624 + min: 618496 + max: 53660880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jn5q92lep + job_id: jep2j6y65 job_status: Passed - torchscript_onnx_ort: - inference_time: 762.0 - throughput: 1312.3359580052493 + torchscript_onnx: + inference_time: 741.0 + throughput: 1349.527665317139 estimated_peak_memory_range: - min: 12288 - max: 24863536 + min: 618496 + max: 19576080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jwgoevl1p + job_id: jn5qwk6e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:14:44Z' + timestamp: '2024-06-22T23:23:20Z' - torchscript_onnx_tflite: - inference_time: 1223.0 - throughput: 817.6614881439084 + inference_time: 1221.0 + throughput: 819.000819000819 estimated_peak_memory_range: - min: 28672 - max: 1440712 + min: 12288 + max: 1486952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j1p8w7eqp + job_id: jo5m42x75 job_status: Passed torchscript_onnx_qnn: - inference_time: 769.0 - throughput: 1300.3901170351105 + inference_time: 780.0 + throughput: 1282.051282051282 estimated_peak_memory_range: - min: 622592 - max: 138856072 + min: 77824 + max: 4781416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jw56q18ng + job_id: j2p0kqz05 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:14:42Z' + timestamp: '2024-06-22T23:23:17Z' + - torchscript_onnx_tflite: + inference_time: 1231.0 + throughput: 812.3476848090983 + estimated_peak_memory_range: + min: 12288 + max: 1741560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 204 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 204 + job_id: jegnxyvj5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 785.0 + throughput: 1273.8853503184714 + estimated_peak_memory_range: + min: 36864 + max: 17255528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: j1p889qqp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:23:18Z' - torchscript_onnx_qnn: - inference_time: 1095.0 - throughput: 913.2420091324201 + inference_time: 890.0 + throughput: 1123.5955056179776 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j1gleky2p + job_id: jqpynw30g job_status: Passed - torchscript_onnx_ort: - inference_time: 1103.0 - throughput: 906.6183136899365 + torchscript_onnx: + inference_time: 1113.0 + throughput: 898.4725965858041 estimated_peak_memory_range: - min: 5971968 - max: 5971968 + min: 7417856 + max: 7417856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: j1pvzwlzg + job_id: j1gl7zv25 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:14:45Z' + timestamp: '2024-06-22T23:23:21Z' diff --git a/qai_hub_models/models/shufflenet_v2_quantized/export.py b/qai_hub_models/models/shufflenet_v2_quantized/export.py index 437ca9e5..a66bc769 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/export.py +++ b/qai_hub_models/models/shufflenet_v2_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_ort=False) + parser = export_parser(model_cls=Model, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/model.py b/qai_hub_models/models/shufflenet_v2_quantized/model.py index a3d7540d..e77c1af3 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/model.py +++ b/qai_hub_models/models/shufflenet_v2_quantized/model.py @@ -92,5 +92,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml index 342366ce..ec0a0951 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: Shufflenet-v2Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 629.0 - throughput: 1589.825119236884 + inference_time: 642.0 + throughput: 1557.632398753894 estimated_peak_memory_range: - min: 16384 - max: 1932240 + min: 12288 + max: 2212664 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jlpe4v785 + job_id: j1p383jm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 582.0 - throughput: 1718.213058419244 + inference_time: 583.0 + throughput: 1715.2658662092624 estimated_peak_memory_range: min: 16384 - max: 102592048 + max: 64010128 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jvgd7vd6g + job_id: jz5wxve4p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:15:22Z' + timestamp: '2024-06-22T23:23:59Z' - torchscript_onnx_tflite: - inference_time: 459.0 - throughput: 2178.649237472767 + inference_time: 460.0 + throughput: 2173.913043478261 estimated_peak_memory_range: - min: 12288 - max: 23307232 + min: 16384 + max: 26620064 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jygzv7l4p + job_id: jwgom0215 job_status: Passed torchscript_onnx_qnn: - inference_time: 418.0 - throughput: 2392.3444976076553 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: min: 163840 - max: 50012432 + max: 45420992 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,7 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jz5wm9lzg + job_id: jmg981lmp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:15:23Z' + timestamp: '2024-06-22T23:24:01Z' - torchscript_onnx_tflite: - inference_time: 649.0 - throughput: 1540.8320493066255 + inference_time: 622.0 + throughput: 1607.717041800643 estimated_peak_memory_range: min: 12288 - max: 1631760 + max: 1609232 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jz5wm9l4g + job_id: j1pv4o6zp job_status: Passed torchscript_onnx_qnn: - inference_time: 585.0 - throughput: 1709.4017094017095 + inference_time: 587.0 + throughput: 1703.5775127768313 estimated_peak_memory_range: - min: 163840 - max: 77147648 + min: 16384 + max: 88702944 primary_compute_unit: NPU precision: int8 layer_info: @@ -145,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jnp1q8nkg + job_id: jvgd09x6p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -154,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:15:26Z' + timestamp: '2024-06-22T23:24:03Z' + - torchscript_onnx_tflite: + inference_time: 625.0 + throughput: 1600.0 + estimated_peak_memory_range: + min: 16384 + max: 1789400 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 205 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 205 + job_id: j7gj1mv1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 598.0 + throughput: 1672.2408026755852 + estimated_peak_memory_range: + min: 172032 + max: 98562128 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jz576wyng + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:24:05Z' - torchscript_onnx_tflite: - inference_time: 944.0 - throughput: 1059.322033898305 + inference_time: 976.0 + throughput: 1024.5901639344263 estimated_peak_memory_range: min: 12288 - max: 17202032 + max: 19164000 primary_compute_unit: NPU precision: int8 layer_info: @@ -168,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jmg994zmg + job_id: jlpe21d8p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -177,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:15:20Z' + timestamp: '2024-06-22T23:23:57Z' - torchscript_onnx_tflite: - inference_time: 8510.0 - throughput: 117.50881316098707 + inference_time: 9453.0 + throughput: 105.78652279699567 estimated_peak_memory_range: - min: 65536 - max: 5500048 + min: 176128 + max: 9298744 primary_compute_unit: CPU precision: fp32 layer_info: @@ -191,7 +233,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 153 total_layers: 205 - job_id: jnp1q8nng + job_id: jygzw934g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -200,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:15:21Z' + timestamp: '2024-06-22T23:23:58Z' - torchscript_onnx_qnn: - inference_time: 694.0 - throughput: 1440.922190201729 + inference_time: 658.0 + throughput: 1519.756838905775 estimated_peak_memory_range: - min: 618496 - max: 618496 + min: 540672 + max: 540672 primary_compute_unit: NPU precision: int8 layer_info: @@ -214,7 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jmg994zqg + job_id: jnp13l4n5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -223,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:15:24Z' + timestamp: '2024-06-22T23:24:02Z' diff --git a/qai_hub_models/models/sinet/app.py b/qai_hub_models/models/sinet/app.py index 00a29a2f..3d3ba287 100644 --- a/qai_hub_models/models/sinet/app.py +++ b/qai_hub_models/models/sinet/app.py @@ -78,8 +78,7 @@ def predict( """ input_tensor = preprocess_image(image) - with torch.no_grad(): - output = self.model(input_tensor) + output = self.model(input_tensor) face_map = (output[0].data.cpu() > 0).numpy()[0] bg_map = output[0].max(0)[1].byte().data.cpu().numpy() diff --git a/qai_hub_models/models/sinet/export.py b/qai_hub_models/models/sinet/export.py index 495dde9f..2dfd9f5c 100644 --- a/qai_hub_models/models/sinet/export.py +++ b/qai_hub_models/models/sinet/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/sinet/model.py b/qai_hub_models/models/sinet/model.py index 76625bd3..1a593d16 100644 --- a/qai_hub_models/models/sinet/model.py +++ b/qai_hub_models/models/sinet/model.py @@ -40,7 +40,7 @@ def __init__( def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> SINet: sinet_model = _load_sinet_source_model_from_weights(weights) - return cls(sinet_model.eval()) + return cls(sinet_model) def forward(self, image: torch.Tensor) -> torch.Tensor: """ diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml index 79d10a22..504e9f64 100644 --- a/qai_hub_models/models/sinet/perf.yaml +++ b/qai_hub_models/models/sinet/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: SINet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1808.0 - throughput: 553.0973451327434 + inference_time: 1758.0 + throughput: 568.8282138794084 estimated_peak_memory_range: - min: 16384 - max: 1874832 + min: 20480 + max: 2455968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jo5mvz0y5 + job_id: jqpynw10g job_status: Passed torchscript_onnx_qnn: - inference_time: 1170.0 - throughput: 854.7008547008547 + inference_time: 1168.0 + throughput: 856.1643835616438 estimated_peak_memory_range: - min: 16384 - max: 4437520 + min: 622592 + max: 28052072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jep23moxg + job_id: jn5qwk0e5 job_status: Passed - torchscript_onnx_ort: - inference_time: 2242.0 - throughput: 446.03033006244425 + torchscript_onnx: + inference_time: 2243.0 + throughput: 445.83147570218455 estimated_peak_memory_range: - min: 233472 - max: 61135024 + min: 16384 + max: 5425456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jogkry6y5 + job_id: j1pv4oqzp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:15:55Z' + timestamp: '2024-06-22T23:24:40Z' - torchscript_onnx_tflite: - inference_time: 1188.0 - throughput: 841.7508417508418 + inference_time: 1160.0 + throughput: 862.0689655172414 estimated_peak_memory_range: - min: 12288 - max: 27213536 + min: 16384 + max: 29922544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jegnre1v5 + job_id: j2p0kq405 job_status: Passed torchscript_onnx_qnn: inference_time: 784.0 throughput: 1275.5102040816328 estimated_peak_memory_range: - min: 0 - max: 67399104 + min: 12288 + max: 61060032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jqpyvd8rp + job_id: j1gl7z425 job_status: Passed - torchscript_onnx_ort: - inference_time: 1564.0 - throughput: 639.386189258312 + torchscript_onnx: + inference_time: 1545.0 + throughput: 647.2491909385113 estimated_peak_memory_range: - min: 12288 - max: 25637744 + min: 0 + max: 19680448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jn5q9247p + job_id: j7gj1md1g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:15:55Z' + timestamp: '2024-06-22T23:24:41Z' - torchscript_onnx_tflite: - inference_time: 1809.0 - throughput: 552.791597567717 + inference_time: 1748.0 + throughput: 572.0823798627002 estimated_peak_memory_range: min: 12288 - max: 1931632 + max: 2065992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jopr1yxvg + job_id: j1p8892qp job_status: Passed torchscript_onnx_qnn: - inference_time: 1183.0 - throughput: 845.30853761623 + inference_time: 1185.0 + throughput: 843.8818565400844 estimated_peak_memory_range: - min: 622592 - max: 10230296 + min: 618496 + max: 60975648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j1p8w7jzp + job_id: j1p383nm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:15:54Z' + timestamp: '2024-06-22T23:24:37Z' + - torchscript_onnx_tflite: + inference_time: 1749.0 + throughput: 571.7552887364208 + estimated_peak_memory_range: + min: 20480 + max: 2530872 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 240 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 240 + job_id: jogkdnvvp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1171.0 + throughput: 853.9709649871904 + estimated_peak_memory_range: + min: 16384 + max: 13834928 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 186 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 186 + job_id: jwgom0z15 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:24:38Z' - torchscript_onnx_qnn: - inference_time: 1353.0 - throughput: 739.0983000739099 + inference_time: 1319.0 + throughput: 758.1501137225171 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j2p0ero25 + job_id: jw56vj2np job_status: Passed - torchscript_onnx_ort: - inference_time: 2343.0 - throughput: 426.8032437046522 + torchscript_onnx: + inference_time: 2306.0 + throughput: 433.6513443191674 estimated_peak_memory_range: - min: 6090752 - max: 6090752 + min: 5414912 + max: 5414912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j1glekwep + job_id: jlpe21o8p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:15:56Z' + timestamp: '2024-06-22T23:24:42Z' diff --git a/qai_hub_models/models/squeezenet1_1/export.py b/qai_hub_models/models/squeezenet1_1/export.py index 361dd2cc..247c50b2 100644 --- a/qai_hub_models/models/squeezenet1_1/export.py +++ b/qai_hub_models/models/squeezenet1_1/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml index e1645dcb..8a5f5078 100644 --- a/qai_hub_models/models/squeezenet1_1/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: SqueezeNet-1_1 performance_metrics: - torchscript_onnx_tflite: - inference_time: 646.0 - throughput: 1547.9876160990711 + inference_time: 661.0 + throughput: 1512.8593040847202 estimated_peak_memory_range: - min: 20480 - max: 1582896 + min: 24576 + max: 1440368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j1p3qmox5 + job_id: jz5wxvw4p job_status: Passed torchscript_onnx_qnn: - inference_time: 702.0 - throughput: 1424.5014245014245 + inference_time: 719.0 + throughput: 1390.8205841446454 estimated_peak_memory_range: - min: 16384 - max: 7170920 + min: 622592 + max: 4290184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j7gjkl375 + job_id: jz576w2ng job_status: Passed - torchscript_onnx_ort: - inference_time: 671.0 - throughput: 1490.312965722802 + torchscript_onnx: + inference_time: 680.0 + throughput: 1470.5882352941176 estimated_peak_memory_range: - min: 12288 - max: 11919448 + min: 16384 + max: 7145704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jmg994oqg + job_id: jopr9q6kp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:16:21Z' + timestamp: '2024-06-22T23:25:10Z' - torchscript_onnx_tflite: - inference_time: 452.0 - throughput: 2212.3893805309735 + inference_time: 476.0 + throughput: 2100.840336134454 estimated_peak_memory_range: - min: 18399232 - max: 41710416 + min: 12288 + max: 23926224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jwgoevd4p + job_id: jmg9810mp job_status: Passed torchscript_onnx_qnn: - inference_time: 492.0 - throughput: 2032.520325203252 + inference_time: 491.0 + throughput: 2036.6598778004072 estimated_peak_memory_range: min: 0 - max: 32687824 + max: 27339600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jlpe4v675 + job_id: jqp48on2g job_status: Passed - torchscript_onnx_ort: - inference_time: 477.0 - throughput: 2096.4360587002097 + torchscript_onnx: + inference_time: 510.0 + throughput: 1960.7843137254902 estimated_peak_memory_range: - min: 12288 - max: 19637184 + min: 24576 + max: 16423504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jnp1q8okg + job_id: jep2j6x65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:16:22Z' + timestamp: '2024-06-22T23:25:11Z' - torchscript_onnx_tflite: - inference_time: 664.0 - throughput: 1506.0240963855422 + inference_time: 653.0 + throughput: 1531.3935681470139 estimated_peak_memory_range: - min: 20480 - max: 1398696 + min: 12288 + max: 1448408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j1pvzw27g + job_id: jnp13l2n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 698.0 - throughput: 1432.6647564469913 + inference_time: 717.0 + throughput: 1394.700139470014 estimated_peak_memory_range: - min: 634880 - max: 7259784 + min: 622592 + max: 90389384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jz5wm9yzg + job_id: jo5m42e75 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:16:20Z' + timestamp: '2024-06-22T23:25:07Z' + - torchscript_onnx_tflite: + inference_time: 670.0 + throughput: 1492.5373134328358 + estimated_peak_memory_range: + min: 12288 + max: 3072888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jvgd09n6p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 707.0 + throughput: 1414.4271570014143 + estimated_peak_memory_range: + min: 20480 + max: 5774040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 70 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 70 + job_id: jegnxy0j5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:25:09Z' - torchscript_onnx_qnn: - inference_time: 801.0 - throughput: 1248.4394506866417 + inference_time: 817.0 + throughput: 1223.9902080783354 estimated_peak_memory_range: - min: 606208 - max: 606208 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jygzv7zzp + job_id: j0pxmj98g job_status: Passed - torchscript_onnx_ort: - inference_time: 681.0 - throughput: 1468.4287812041116 + torchscript_onnx: + inference_time: 662.0 + throughput: 1510.5740181268882 estimated_peak_memory_range: - min: 3670016 - max: 3670016 + min: 3444736 + max: 3444736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jvgd7v6kg + job_id: jqpynwz0g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:16:23Z' + timestamp: '2024-06-22T23:25:12Z' diff --git a/qai_hub_models/models/squeezenet1_1_quantized/export.py b/qai_hub_models/models/squeezenet1_1_quantized/export.py index f68f076e..ab2bba60 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/export.py +++ b/qai_hub_models/models/squeezenet1_1_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/squeezenet1_1_quantized/model.py b/qai_hub_models/models/squeezenet1_1_quantized/model.py index 63e2a276..097b1cd9 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/model.py +++ b/qai_hub_models/models/squeezenet1_1_quantized/model.py @@ -78,5 +78,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml index 1d3b45b2..8ef50f58 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: SqueezeNet-1_1Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 220.0 - throughput: 4545.454545454545 + inference_time: 229.0 + throughput: 4366.812227074236 estimated_peak_memory_range: - min: 16384 - max: 1715824 + min: 24576 + max: 1612824 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jqp4jweqp + job_id: j1p8890qp job_status: Passed torchscript_onnx_qnn: - inference_time: 467.0 - throughput: 2141.3276231263385 + inference_time: 468.0 + throughput: 2136.7521367521367 estimated_peak_memory_range: min: 167936 - max: 10118072 + max: 3876080 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: jep23mdxg - job_status: Passed - torchscript_onnx_ort: - inference_time: 450.0 - throughput: 2222.222222222222 - estimated_peak_memory_range: - min: 12288 - max: 5507096 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: jogkry0y5 + job_id: jwgom0k15 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:16:52Z' + timestamp: '2024-06-22T23:25:39Z' - torchscript_onnx_tflite: - inference_time: 179.0 - throughput: 5586.592178770949 + inference_time: 184.0 + throughput: 5434.782608695652 estimated_peak_memory_range: min: 12288 - max: 22450960 + max: 23959632 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j0pxe10j5 + job_id: jogkdn7vp job_status: Passed torchscript_onnx_qnn: inference_time: 342.0 throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 12288 - max: 27530432 + min: 163840 + max: 28116064 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: jqpyvd2rp - job_status: Passed - torchscript_onnx_ort: - inference_time: 372.0 - throughput: 2688.1720430107525 - estimated_peak_memory_range: - min: 12288 - max: 15334176 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: jn5q9217p + job_id: j1pv4o0zp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:16:53Z' + timestamp: '2024-06-22T23:25:40Z' - torchscript_onnx_tflite: - inference_time: 223.0 - throughput: 4484.304932735426 + inference_time: 218.0 + throughput: 4587.155963302752 estimated_peak_memory_range: min: 20480 - max: 1471296 + max: 1350800 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jo5mvz9y5 + job_id: jn5qwkee5 job_status: Passed torchscript_onnx_qnn: - inference_time: 464.0 - throughput: 2155.1724137931033 + inference_time: 467.0 + throughput: 2141.3276231263385 estimated_peak_memory_range: - min: 28672 - max: 17992504 + min: 172032 + max: 10450952 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: j1p8w7rzp + job_id: jlpe21e8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:16:51Z' + timestamp: '2024-06-22T23:25:43Z' - torchscript_onnx_tflite: - inference_time: 526.0 - throughput: 1901.1406844106464 + inference_time: 219.0 + throughput: 4566.2100456621 estimated_peak_memory_range: min: 12288 - max: 14752288 + max: 1516424 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jegnreqv5 + job_id: j1gl7z625 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 465.0 + throughput: 2150.537634408602 + estimated_peak_memory_range: + min: 12288 + max: 17011160 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 45 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 45 + job_id: jygzw9o4g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:25:44Z' + - torchscript_onnx_tflite: + inference_time: 533.0 + throughput: 1876.172607879925 + estimated_peak_memory_range: + min: 12288 + max: 16053664 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jw56vjenp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:16:46Z' + timestamp: '2024-06-22T23:25:37Z' - torchscript_onnx_tflite: - inference_time: 4092.0 - throughput: 244.37927663734115 + inference_time: 4108.0 + throughput: 243.42745861733204 estimated_peak_memory_range: - min: 20480 - max: 7234128 + min: 163840 + max: 1845464 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jopr1ydvg + job_id: j1p383vm5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:16:47Z' + timestamp: '2024-06-22T23:25:38Z' - torchscript_onnx_qnn: - inference_time: 536.0 - throughput: 1865.6716417910447 + inference_time: 541.0 + throughput: 1848.4288354898335 estimated_peak_memory_range: - min: 1843200 - max: 1843200 + min: 679936 + max: 679936 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: j2p0er925 - job_status: Passed - torchscript_onnx_ort: - inference_time: 472.0 - throughput: 2118.64406779661 - estimated_peak_memory_range: - min: 2641920 - max: 2641920 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: j1glek8ep + job_id: j7gj1mz1g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:16:54Z' + timestamp: '2024-06-22T23:25:42Z' diff --git a/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py b/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py index c6394ccb..6b5d0103 100644 --- a/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py +++ b/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py @@ -27,7 +27,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, diff --git a/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py b/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py index d2b0dffd..9fdaa7eb 100644 --- a/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py +++ b/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py @@ -27,7 +27,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, diff --git a/qai_hub_models/models/stylegan2/app.py b/qai_hub_models/models/stylegan2/app.py index f0c4c304..2db6ace2 100644 --- a/qai_hub_models/models/stylegan2/app.py +++ b/qai_hub_models/models/stylegan2/app.py @@ -76,34 +76,31 @@ def generate_images( Returns: See raw_output parameter description. """ - with torch.no_grad(): - if image_noise is None: - image_noise = self.generate_random_vec( - batch_size=class_idx.shape[0] if class_idx is not None else 1 - ) - - if self.num_classes != 0: - if isinstance(class_idx, int): - class_idx = torch.Tensor([class_idx] * image_noise.shape[0]) - - if isinstance(class_idx, torch.Tensor) and len(class_idx.shape) == 1: - # Convert from [N] class index to one-hot [N, # of classes] - assert class_idx.dtype == torch.int - model_classes = torch.nn.functional.one_hot( - class_idx, self.num_classes - ) - else: - model_classes = class_idx - - image_tensor = self.model(image_noise, model_classes) + if image_noise is None: + image_noise = self.generate_random_vec( + batch_size=class_idx.shape[0] if class_idx is not None else 1 + ) + + if self.num_classes != 0: + if isinstance(class_idx, int): + class_idx = torch.Tensor([class_idx] * image_noise.shape[0]) + + if isinstance(class_idx, torch.Tensor) and len(class_idx.shape) == 1: + # Convert from [N] class index to one-hot [N, # of classes] + assert class_idx.dtype == torch.int + model_classes = torch.nn.functional.one_hot(class_idx, self.num_classes) else: - image_tensor = self.model(image_noise) + model_classes = class_idx - image_tensor = ( - (image_tensor.permute(0, 2, 3, 1) * 127.5 + 128) - .clamp(0, 255) - .to(torch.uint8) - ) + image_tensor = self.model(image_noise, model_classes) + else: + image_tensor = self.model(image_noise) + + image_tensor = ( + (image_tensor.permute(0, 2, 3, 1) * 127.5 + 128) + .clamp(0, 255) + .to(torch.uint8) + ) if raw_output: return image_tensor diff --git a/qai_hub_models/models/stylegan2/export.py b/qai_hub_models/models/stylegan2/export.py index 5a712592..354e2082 100644 --- a/qai_hub_models/models/stylegan2/export.py +++ b/qai_hub_models/models/stylegan2/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -115,13 +115,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -179,7 +178,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -201,7 +200,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) @@ -216,8 +215,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/stylegan2/perf.yaml b/qai_hub_models/models/stylegan2/perf.yaml index 938965f9..f9924141 100644 --- a/qai_hub_models/models/stylegan2/perf.yaml +++ b/qai_hub_models/models/stylegan2/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: StyleGAN2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1649413.0 - throughput: 0.6062762934450013 + inference_time: 1555617.0 + throughput: 0.6428317510029783 estimated_peak_memory_range: - min: 1397805056 - max: 2230233016 + min: 1394122752 + max: 2215496248 primary_compute_unit: CPU precision: fp32 layer_info: @@ -48,7 +50,7 @@ models: layers_on_gpu: 78 layers_on_cpu: 402 total_layers: 480 - job_id: j1p3qm7x5 + job_id: j0pxmjn8g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +59,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:17:52Z' + timestamp: '2024-06-22T23:26:47Z' - torchscript_onnx_tflite: - inference_time: 1311471.0 - throughput: 0.7625025639148711 + inference_time: 1307557.0 + throughput: 0.764785015108328 estimated_peak_memory_range: - min: 1184645120 - max: 1218773040 + min: 1076854784 + max: 1106994896 primary_compute_unit: CPU precision: fp32 layer_info: @@ -71,7 +73,7 @@ models: layers_on_gpu: 78 layers_on_cpu: 402 total_layers: 480 - job_id: jwgoevw4p + job_id: jo5m42q75 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:17:53Z' + timestamp: '2024-06-22T23:26:48Z' - torchscript_onnx_tflite: - inference_time: 1578379.0 - throughput: 0.6335613943165742 + inference_time: 1673478.0 + throughput: 0.5975579003727566 estimated_peak_memory_range: - min: 1049174016 - max: 1057203192 + min: 842801152 + max: 3302164848 primary_compute_unit: CPU precision: fp32 layer_info: @@ -94,7 +96,7 @@ models: layers_on_gpu: 78 layers_on_cpu: 402 total_layers: 480 - job_id: j1pvzwm7g + job_id: jegnxylj5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,4 +105,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:17:54Z' + timestamp: '2024-06-22T23:26:50Z' + - torchscript_onnx_tflite: + inference_time: 1573212.0 + throughput: 0.63564224020666 + estimated_peak_memory_range: + min: 1351241728 + max: 1354927960 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 78 + layers_on_cpu: 402 + total_layers: 480 + job_id: jopr9q8kp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:26:51Z' diff --git a/qai_hub_models/models/swin_base/export.py b/qai_hub_models/models/swin_base/export.py index 79d16b96..5c7ba3ae 100644 --- a/qai_hub_models/models/swin_base/export.py +++ b/qai_hub_models/models/swin_base/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/swin_base/model.py b/qai_hub_models/models/swin_base/model.py index b4ba8831..34e855dd 100644 --- a/qai_hub_models/models/swin_base/model.py +++ b/qai_hub_models/models/swin_base/model.py @@ -29,4 +29,4 @@ def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier: replace_module_recursively( net, torch.nn.Linear, AutoSplitLinear, parent_module=PatchMerging ) - return cls(net.eval()) + return cls(net) diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml index 37ad14fa..0a8975b8 100644 --- a/qai_hub_models/models/swin_base/perf.yaml +++ b/qai_hub_models/models/swin_base/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Swin-Base performance_metrics: - torchscript_onnx_tflite: - inference_time: 38045.0 - throughput: 26.284662899198317 + inference_time: 37584.0 + throughput: 26.607066836951894 estimated_peak_memory_range: - min: 307200 - max: 3648376 + min: 126976 + max: 3572000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jz57vdnq5 + job_id: j1p3831m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 31404.0 - throughput: 31.84307731499172 + inference_time: 31554.0 + throughput: 31.691703112125246 estimated_peak_memory_range: - min: 57344 - max: 46336408 + min: 16384 + max: 51870480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jo5mvzky5 + job_id: jlpe21w8p job_status: Passed - torchscript_onnx_ort: - inference_time: 63106.0 - throughput: 15.846353754001205 + torchscript_onnx: + inference_time: 63881.0 + throughput: 15.654106854933392 estimated_peak_memory_range: - min: 278528 - max: 457269496 + min: 86016 + max: 471415568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1163 - job_id: jqpyvdyrp + job_id: jvgd09q6p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:18:38Z' + timestamp: '2024-06-22T23:27:48Z' - torchscript_onnx_tflite: - inference_time: 26266.0 - throughput: 38.07203228508338 + inference_time: 26582.0 + throughput: 37.61944172748476 estimated_peak_memory_range: - min: 49152 - max: 501753168 + min: 45056 + max: 524204544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jqp4jw4qp + job_id: jwgom0n15 job_status: Passed torchscript_onnx_qnn: - inference_time: 22072.0 - throughput: 45.30627038782168 + inference_time: 21873.0 + throughput: 45.7184656882915 estimated_peak_memory_range: min: 0 - max: 409890496 + max: 368391424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jegnrewv5 + job_id: jygzw9j4g job_status: Passed - torchscript_onnx_ort: - inference_time: 44119.0 - throughput: 22.66597157687164 + torchscript_onnx: + inference_time: 44504.0 + throughput: 22.469890346935106 estimated_peak_memory_range: - min: 643072 - max: 204011072 + min: 667648 + max: 185198512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1163 - job_id: j2p0erx25 + job_id: jz5wxv3zp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:18:39Z' + timestamp: '2024-06-22T23:27:49Z' - torchscript_onnx_tflite: - inference_time: 38074.0 - throughput: 26.264642538215053 + inference_time: 37732.0 + throughput: 26.502703275734124 estimated_peak_memory_range: - min: 61440 - max: 4041520 + min: 86016 + max: 4380992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j0pxe1rj5 + job_id: j1pv4orzp job_status: Passed torchscript_onnx_qnn: - inference_time: 31252.0 - throughput: 31.997952131063613 + inference_time: 31187.0 + throughput: 32.064642318914935 estimated_peak_memory_range: - min: 61440 - max: 51901248 + min: 12288 + max: 49081336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jep23mzxg + job_id: jmg981ymp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:18:37Z' + timestamp: '2024-06-22T23:27:46Z' + - torchscript_onnx_tflite: + inference_time: 37791.0 + throughput: 26.461326770924295 + estimated_peak_memory_range: + min: 90112 + max: 3029064 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1568 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1568 + job_id: j7gj1m21g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 31145.0 + throughput: 32.10788248515011 + estimated_peak_memory_range: + min: 16384 + max: 45743360 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1255 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1255 + job_id: jnp13lwn5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:27:47Z' - torchscript_onnx_qnn: - inference_time: 38623.0 - throughput: 25.89130828780778 + inference_time: 29765.0 + throughput: 33.59650596337981 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jopr1y7vg + job_id: jz5wxv34p job_status: Passed - torchscript_onnx_ort: - inference_time: 65447.0 - throughput: 15.27953916909866 + torchscript_onnx: + inference_time: 65561.0 + throughput: 15.252970516007993 estimated_peak_memory_range: - min: 552267776 - max: 552267776 + min: 561917952 + max: 561917952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1163 - job_id: j1p8w7kzp + job_id: jmg981yqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:18:40Z' + timestamp: '2024-06-22T23:27:51Z' diff --git a/qai_hub_models/models/swin_small/export.py b/qai_hub_models/models/swin_small/export.py index 67677bbe..ef02503b 100644 --- a/qai_hub_models/models/swin_small/export.py +++ b/qai_hub_models/models/swin_small/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/swin_small/model.py b/qai_hub_models/models/swin_small/model.py index 081d731f..dd1ee1e2 100644 --- a/qai_hub_models/models/swin_small/model.py +++ b/qai_hub_models/models/swin_small/model.py @@ -29,4 +29,4 @@ def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier: replace_module_recursively( net, torch.nn.Linear, AutoSplitLinear, parent_module=PatchMerging ) - return cls(net.eval()) + return cls(net) diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml index 4bd928fb..16f4e5f6 100644 --- a/qai_hub_models/models/swin_small/perf.yaml +++ b/qai_hub_models/models/swin_small/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Swin-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 29054.0 - throughput: 34.41866868589523 + inference_time: 28880.0 + throughput: 34.62603878116344 estimated_peak_memory_range: - min: 24576 - max: 7976680 + min: 53248 + max: 3319424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jn5q92d7p + job_id: jvgd09qkp job_status: Passed torchscript_onnx_qnn: - inference_time: 23697.0 - throughput: 42.19943452757733 + inference_time: 23486.0 + throughput: 42.578557438473986 estimated_peak_memory_range: - min: 0 - max: 40982576 + min: 40960 + max: 42627616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: j1p3qmrx5 + job_id: jo5m426y5 job_status: Passed - torchscript_onnx_ort: - inference_time: 56535.0 - throughput: 17.688157778367383 + torchscript_onnx: + inference_time: 54900.0 + throughput: 18.214936247723134 estimated_peak_memory_range: - min: 57344 - max: 250098192 + min: 16384 + max: 249192608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1158 - job_id: jlpe4vn75 + job_id: j2p0kql25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:19:17Z' + timestamp: '2024-06-22T23:28:28Z' - torchscript_onnx_tflite: inference_time: 19652.0 throughput: 50.8854060655404 estimated_peak_memory_range: - min: 45056 - max: 468730016 + min: 49152 + max: 491763984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: j1glekqep + job_id: jz576wlqg job_status: Passed torchscript_onnx_qnn: - inference_time: 16097.0 - throughput: 62.123377026775174 + inference_time: 16133.0 + throughput: 61.98475175106924 estimated_peak_memory_range: min: 0 - max: 371590576 + max: 338877936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: jwgoev94p + job_id: jegnxy3v5 job_status: Passed - torchscript_onnx_ort: - inference_time: 39326.0 - throughput: 25.42846971469257 + torchscript_onnx: + inference_time: 38428.0 + throughput: 26.02269178723847 estimated_peak_memory_range: - min: 651264 - max: 174791408 + min: 618496 + max: 147135632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1158 - job_id: jygzv70zp + job_id: j1p889zzp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:19:18Z' + timestamp: '2024-06-22T23:28:30Z' - torchscript_onnx_tflite: - inference_time: 29025.0 - throughput: 34.45305770887166 + inference_time: 28669.0 + throughput: 34.88088178869162 estimated_peak_memory_range: - min: 69632 - max: 3142616 + min: 20480 + max: 2800768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jw56q10vg + job_id: jqp48odqg job_status: Passed torchscript_onnx_qnn: - inference_time: 23503.0 - throughput: 42.54775986044335 + inference_time: 23305.0 + throughput: 42.90924694271615 estimated_peak_memory_range: min: 36864 - max: 38372320 + max: 41438080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: j7gjkl875 + job_id: jep2j6lx5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:19:16Z' + timestamp: '2024-06-22T23:28:26Z' + - torchscript_onnx_tflite: + inference_time: 28806.0 + throughput: 34.71498993265292 + estimated_peak_memory_range: + min: 73728 + max: 6136808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1563 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1563 + job_id: j0pxmj6jg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 23462.0 + throughput: 42.62211235188816 + estimated_peak_memory_range: + min: 167936 + max: 42534400 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1246 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1246 + job_id: jqpynw6rg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:28:27Z' - torchscript_onnx_qnn: - inference_time: 23778.0 - throughput: 42.055681722600724 + inference_time: 22363.0 + throughput: 44.71671958145151 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: j1pvzwn7g + job_id: jopr9qevp job_status: Passed - torchscript_onnx_ort: - inference_time: 58093.0 - throughput: 17.213777907837432 + torchscript_onnx: + inference_time: 56601.0 + throughput: 17.66753237575308 estimated_peak_memory_range: - min: 385679360 - max: 385679360 + min: 387993600 + max: 387993600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1158 - job_id: jz5wm9rzg + job_id: jogkdn3yp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:19:19Z' + timestamp: '2024-06-22T23:28:31Z' diff --git a/qai_hub_models/models/swin_tiny/export.py b/qai_hub_models/models/swin_tiny/export.py index fbe5734d..ace0abc5 100644 --- a/qai_hub_models/models/swin_tiny/export.py +++ b/qai_hub_models/models/swin_tiny/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/swin_tiny/model.py b/qai_hub_models/models/swin_tiny/model.py index 2c32e595..0b293ec7 100644 --- a/qai_hub_models/models/swin_tiny/model.py +++ b/qai_hub_models/models/swin_tiny/model.py @@ -29,4 +29,4 @@ def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier: replace_module_recursively( net, torch.nn.Linear, AutoSplitLinear, parent_module=PatchMerging ) - return cls(net.eval()) + return cls(net) diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml index 7281ba04..bf08ca33 100644 --- a/qai_hub_models/models/swin_tiny/perf.yaml +++ b/qai_hub_models/models/swin_tiny/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Swin-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 17582.0 - throughput: 56.87635081333182 + inference_time: 17334.0 + throughput: 57.69008884273682 estimated_peak_memory_range: - min: 49152 - max: 3052248 + min: 45056 + max: 3426144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jnp1q8mkg + job_id: j1gl7z3e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14870.0 - throughput: 67.24949562878278 + inference_time: 14963.0 + throughput: 66.83151774376796 estimated_peak_memory_range: - min: 40960 - max: 28468704 + min: 16384 + max: 25111704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jqp4jw2qp + job_id: j1pv4ov7p job_status: Passed - torchscript_onnx_ort: - inference_time: 33752.0 - throughput: 29.627873903768666 + torchscript_onnx: + inference_time: 32650.0 + throughput: 30.627871362940276 estimated_peak_memory_range: min: 0 - max: 143848064 + max: 154932224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 624 - job_id: jopr1ymvg + job_id: jmg981wqp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:19:49Z' + timestamp: '2024-06-22T23:29:04Z' - torchscript_onnx_tflite: - inference_time: 11836.0 - throughput: 84.48800270361609 + inference_time: 11831.0 + throughput: 84.52370890034655 estimated_peak_memory_range: - min: 40960 - max: 291213504 + min: 16384 + max: 301706880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jvgd7vmkg + job_id: jw56vjnvp job_status: Passed torchscript_onnx_qnn: - inference_time: 9960.0 - throughput: 100.40160642570281 + inference_time: 9973.0 + throughput: 100.2707309736288 estimated_peak_memory_range: - min: 618496 - max: 226851856 + min: 81215488 + max: 290065488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: j0pxe1zj5 + job_id: j7gj1me7g job_status: Passed - torchscript_onnx_ort: - inference_time: 23820.0 - throughput: 41.98152812762385 + torchscript_onnx: + inference_time: 22771.0 + throughput: 43.91550656536823 estimated_peak_memory_range: - min: 53248 - max: 113324624 + min: 36864 + max: 101885200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 624 - job_id: jep23mqxg + job_id: jnp13lek5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:19:50Z' + timestamp: '2024-06-22T23:29:05Z' - torchscript_onnx_tflite: - inference_time: 17413.0 - throughput: 57.42835812324125 + inference_time: 17424.0 + throughput: 57.3921028466483 estimated_peak_memory_range: - min: 24576 - max: 3013416 + min: 49152 + max: 3700560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jz57vd8q5 + job_id: j1p383ex5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14630.0 - throughput: 68.3526999316473 + inference_time: 14895.0 + throughput: 67.1366230278617 estimated_peak_memory_range: - min: 12288 - max: 29408864 + min: 229376 + max: 26294976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jegnredv5 + job_id: jygzw9rzg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:19:48Z' + timestamp: '2024-06-22T23:29:01Z' + - torchscript_onnx_tflite: + inference_time: 17337.0 + throughput: 57.680106131395284 + estimated_peak_memory_range: + min: 28672 + max: 2941824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 837 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 837 + job_id: jwgom0345 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14963.0 + throughput: 66.83151774376796 + estimated_peak_memory_range: + min: 40960 + max: 28386808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 700 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 700 + job_id: jz5wxvqzp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:29:03Z' - torchscript_onnx_qnn: - inference_time: 14162.0 - throughput: 70.61149555147578 + inference_time: 13869.0 + throughput: 72.10325185665873 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jo5mvzly5 + job_id: jlpe21k7p job_status: Passed - torchscript_onnx_ort: - inference_time: 34948.0 - throughput: 28.613940711914847 + torchscript_onnx: + inference_time: 33671.0 + throughput: 29.69914763446289 estimated_peak_memory_range: - min: 211316736 - max: 211316736 + min: 87080960 + max: 87080960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 624 - job_id: jqpyvdkrp + job_id: jvgd09okp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:19:50Z' + timestamp: '2024-06-22T23:29:06Z' diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py index 7c62002b..03477c9f 100644 --- a/qai_hub_models/models/trocr/export.py +++ b/qai_hub_models/models/trocr/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -240,7 +239,7 @@ def main(): model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False, - supports_precompiled_ort=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml index 25bfdd7d..0649fe59 100644 --- a/qai_hub_models/models/trocr/perf.yaml +++ b/qai_hub_models/models/trocr/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: TrOCREncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 148428.0 - throughput: 6.737273290753766 + inference_time: 148085.0 + throughput: 6.752878414424148 estimated_peak_memory_range: - min: 6459392 - max: 9952352 + min: 8511488 + max: 11264624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 592 - job_id: j1p8w7dzp + job_id: jqp48ovqg job_status: Passed - torchscript_onnx_ort: - inference_time: 109810.0 - throughput: 9.106638739641198 + torchscript_onnx: + inference_time: 108599.0 + throughput: 9.208187920699086 estimated_peak_memory_range: - min: 14303232 - max: 127415872 + min: 14270464 + max: 130124584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jmg9947qg + job_id: jygzw97zg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +74,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:20:35Z' + timestamp: '2024-06-22T23:29:59Z' - torchscript_onnx_tflite: - inference_time: 111077.0 - throughput: 9.00276384850149 + inference_time: 111580.0 + throughput: 8.962179602079226 estimated_peak_memory_range: - min: 6410240 - max: 350751520 + min: 6737920 + max: 361915184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +88,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 592 - job_id: jn5q92x7p + job_id: jo5m423y5 job_status: Passed - torchscript_onnx_ort: - inference_time: 83685.0 - throughput: 11.9495728027723 + torchscript_onnx: + inference_time: 82794.0 + throughput: 12.078169915694374 estimated_peak_memory_range: - min: 12636160 - max: 89203248 + min: 10301440 + max: 74776016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jvgd7vykg + job_id: jmg9814qp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +112,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:20:37Z' + timestamp: '2024-06-22T23:30:02Z' - torchscript_onnx_tflite: - inference_time: 148360.0 - throughput: 6.740361283364789 + inference_time: 147811.0 + throughput: 6.765396350745209 estimated_peak_memory_range: - min: 7380992 - max: 9974128 + min: 7270400 + max: 10717800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +126,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 592 - job_id: jw56q19vg + job_id: jopr9qyvp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:20:26Z' - - torchscript_onnx_ort: - inference_time: 109878.0 - throughput: 9.101002930522943 + timestamp: '2024-06-22T23:29:44Z' + - torchscript_onnx_tflite: + inference_time: 147905.0 + throughput: 6.76109664987661 estimated_peak_memory_range: - min: 28672 - max: 28672 + min: 7254016 + max: 10544600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 592 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 592 + job_id: jqpynwdrg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:29:46Z' + - torchscript_onnx: + inference_time: 109376.0 + throughput: 9.14277355178467 + estimated_peak_memory_range: + min: 13611008 + max: 13611008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jmg9947vg + job_id: jvgd09vkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,30 +181,30 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:20:38Z' + timestamp: '2024-06-22T23:30:04Z' - name: TrOCRDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 2732.0 - throughput: 366.03221083455344 + inference_time: 2715.0 + throughput: 368.3241252302026 estimated_peak_memory_range: min: 12288 - max: 2455200 + max: 2747864 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 370 + layers_on_npu: 382 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 370 - job_id: jogkrywy5 + total_layers: 382 + job_id: j0pxmjyjg job_status: Passed - torchscript_onnx_ort: - inference_time: 2915.0 - throughput: 343.0531732418525 + torchscript_onnx: + inference_time: 2864.0 + throughput: 349.16201117318434 estimated_peak_memory_range: - min: 28672 - max: 588384064 + min: 12288 + max: 576534240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -187,7 +212,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 353 - job_id: jnp1q8kkg + job_id: jz5wxv9zp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -196,28 +221,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:20:35Z' + timestamp: '2024-06-22T23:30:00Z' - torchscript_onnx_tflite: - inference_time: 1997.0 - throughput: 500.75112669003505 + inference_time: 1974.0 + throughput: 506.5856129685917 estimated_peak_memory_range: min: 12288 - max: 195170736 + max: 199729808 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 370 + layers_on_npu: 382 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 370 - job_id: j1glek9ep + total_layers: 382 + job_id: jegnxyev5 job_status: Passed - torchscript_onnx_ort: - inference_time: 2106.0 - throughput: 474.8338081671415 + torchscript_onnx: + inference_time: 2283.0 + throughput: 438.02014892685065 estimated_peak_memory_range: min: 0 - max: 49553392 + max: 35734832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -225,7 +250,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 353 - job_id: jz5wm90jg + job_id: jnp13l8k5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -234,21 +259,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:20:37Z' + timestamp: '2024-06-22T23:30:03Z' - torchscript_onnx_tflite: - inference_time: 2737.0 - throughput: 365.36353671903544 + inference_time: 2722.0 + throughput: 367.37692872887584 estimated_peak_memory_range: - min: 16384 - max: 3465512 + min: 12288 + max: 2054840 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 370 + layers_on_npu: 382 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 370 - job_id: j1p3qmlx5 + total_layers: 382 + job_id: jep2j6mx5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -257,13 +282,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:20:27Z' - - torchscript_onnx_ort: - inference_time: 2812.0 - throughput: 355.6187766714082 + timestamp: '2024-06-22T23:29:44Z' + - torchscript_onnx_tflite: + inference_time: 2729.0 + throughput: 366.43459142543054 + estimated_peak_memory_range: + min: 12288 + max: 2338424 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 382 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 382 + job_id: j2p0kqr25 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:29:47Z' + - torchscript_onnx: + inference_time: 2636.0 + throughput: 379.3626707132018 estimated_peak_memory_range: - min: 352550912 - max: 352550912 + min: 347582464 + max: 347582464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -271,7 +319,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 353 - job_id: jnp1q8klg + job_id: jz576wdqg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -280,4 +328,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:20:39Z' + timestamp: '2024-06-22T23:30:05Z' diff --git a/qai_hub_models/models/unet_segmentation/app.py b/qai_hub_models/models/unet_segmentation/app.py index 41683171..6bbf207d 100644 --- a/qai_hub_models/models/unet_segmentation/app.py +++ b/qai_hub_models/models/unet_segmentation/app.py @@ -36,7 +36,6 @@ def predict(self, image: Image) -> torch.Tensor: """ img = preprocess_PIL_image(image) - with torch.no_grad(): - out = self.model(img) + out = self.model(img) mask = out.argmax(dim=1) return mask[0].bool().numpy() diff --git a/qai_hub_models/models/unet_segmentation/export.py b/qai_hub_models/models/unet_segmentation/export.py index 6274534f..9081590a 100644 --- a/qai_hub_models/models/unet_segmentation/export.py +++ b/qai_hub_models/models/unet_segmentation/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,13 +117,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -169,7 +168,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -187,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -209,7 +208,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/unet_segmentation/model.py b/qai_hub_models/models/unet_segmentation/model.py index 32290667..8a03fd6e 100644 --- a/qai_hub_models/models/unet_segmentation/model.py +++ b/qai_hub_models/models/unet_segmentation/model.py @@ -37,7 +37,7 @@ def from_pretrained(cls, ckpt_url: Optional[str] = DEFAULT_WEIGHTS): if ckpt_url is not None: state_dict = load_torch(ckpt_url) net.load_state_dict(state_dict) - return cls(net.eval()) + return cls(net) def forward(self, image: torch.Tensor): """ diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml index b4a6ea65..bade6f1c 100644 --- a/qai_hub_models/models/unet_segmentation/perf.yaml +++ b/qai_hub_models/models/unet_segmentation/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Unet-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 159228.0 - throughput: 6.280302459366443 + inference_time: 160376.0 + throughput: 6.235346934703447 estimated_peak_memory_range: - min: 6418432 - max: 111435960 + min: 6463488 + max: 463915992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jo5mvz7q5 + job_id: jo5m42zy5 job_status: Passed torchscript_onnx_qnn: - inference_time: 156519.0 - throughput: 6.389000696401076 + inference_time: 155942.0 + throughput: 6.412640597145092 estimated_peak_memory_range: - min: 9871360 - max: 31082800 + min: 9875456 + max: 28375952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: jep23m1mg + job_id: jqpynw4rg job_status: Passed - torchscript_onnx_ort: - inference_time: 165647.0 - throughput: 6.03693396197939 + torchscript_onnx: + inference_time: 165270.0 + throughput: 6.05070490712168 estimated_peak_memory_range: - min: 13611008 - max: 154509064 + min: 13549568 + max: 156940568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jn5q92nmp + job_id: j1gl7z0e5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:21:28Z' + timestamp: '2024-06-22T23:30:58Z' - torchscript_onnx_tflite: - inference_time: 121153.0 - throughput: 8.254025901133279 + inference_time: 118210.0 + throughput: 8.459521191100583 estimated_peak_memory_range: - min: 6619136 - max: 339596672 + min: 5234688 + max: 340954016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jegnre4m5 + job_id: jegnxy9v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 110026.0 - throughput: 9.0887608383473 + inference_time: 110282.0 + throughput: 9.06766290056401 estimated_peak_memory_range: - min: 9850880 - max: 91369248 + min: 9863168 + max: 90274032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: j2p0erwe5 + job_id: j2p0kq125 job_status: Passed - torchscript_onnx_ort: - inference_time: 119057.0 - throughput: 8.399338132155187 + torchscript_onnx: + inference_time: 121360.0 + throughput: 8.23994726433751 estimated_peak_memory_range: - min: 22478848 - max: 104785056 + min: 15745024 + max: 95078624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j1glekdlp + job_id: jw56vj3vp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:21:30Z' + timestamp: '2024-06-22T23:30:59Z' - torchscript_onnx_tflite: - inference_time: 157133.0 - throughput: 6.364035562230722 + inference_time: 158593.0 + throughput: 6.305448538081756 estimated_peak_memory_range: - min: 6680576 - max: 111633312 + min: 6676480 + max: 111417824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jopr1yreg + job_id: jopr9q4vp job_status: Passed torchscript_onnx_qnn: - inference_time: 148329.0 - throughput: 6.741769984291676 + inference_time: 149290.0 + throughput: 6.698372295532185 estimated_peak_memory_range: - min: 9969664 - max: 32982776 + min: 9900032 + max: 30874704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: jogkry1o5 + job_id: jogkdnlyp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:21:27Z' + timestamp: '2024-06-22T23:30:56Z' + - torchscript_onnx_tflite: + inference_time: 161552.0 + throughput: 6.189957413092998 + estimated_peak_memory_range: + min: 6701056 + max: 463829976 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 31 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 31 + job_id: jep2j67x5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 146922.0 + throughput: 6.806332611862077 + estimated_peak_memory_range: + min: 8839168 + max: 29513312 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 51 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 51 + job_id: jn5qwk775 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:30:57Z' - torchscript_onnx_qnn: - inference_time: 190476.0 - throughput: 5.25000525000525 + inference_time: 133369.0 + throughput: 7.497994286528353 estimated_peak_memory_range: - min: 9854976 - max: 9854976 + min: 9850880 + max: 9850880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: j1p8w7n8p + job_id: j1p8893zp job_status: Passed - torchscript_onnx_ort: - inference_time: 146401.0 - throughput: 6.830554436103578 + torchscript_onnx: + inference_time: 146596.0 + throughput: 6.821468525744222 estimated_peak_memory_range: - min: 17457152 - max: 17457152 + min: 9854976 + max: 9854976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jw56q1x7g + job_id: j1p3834x5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:21:31Z' + timestamp: '2024-06-22T23:31:01Z' diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py index a4f94916..80db8b67 100644 --- a/qai_hub_models/models/vit/export.py +++ b/qai_hub_models/models/vit/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,7 +116,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace( model.to("cpu"), make_torch_inputs(input_spec), check_trace=False ) @@ -124,7 +123,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -190,7 +189,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -220,7 +219,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml index 459d6f2d..53eb8912 100644 --- a/qai_hub_models/models/vit/perf.yaml +++ b/qai_hub_models/models/vit/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: VIT performance_metrics: - torchscript_onnx_tflite: - inference_time: 78496.0 - throughput: 12.73950264981655 + inference_time: 78563.0 + throughput: 12.728638163002941 estimated_peak_memory_range: min: 102400 - max: 3437176 + max: 3460416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 535 - job_id: jwgoevxdp + job_id: j1pv4o17p job_status: Passed - torchscript_onnx_ort: - inference_time: 103100.0 - throughput: 9.699321047526674 + torchscript_onnx: + inference_time: 92821.0 + throughput: 10.77342411738723 estimated_peak_memory_range: - min: 110592 - max: 441770400 + min: 49152 + max: 443858496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jnp1q89lg + job_id: jqp48oxqg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +74,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:22:03Z' + timestamp: '2024-06-22T23:31:35Z' - torchscript_onnx_tflite: - inference_time: 56654.0 - throughput: 17.65100434214707 + inference_time: 56889.0 + throughput: 17.578090667791663 estimated_peak_memory_range: - min: 77824 - max: 375276272 + min: 114688 + max: 382195568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +88,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 535 - job_id: j1pvzw8mg + job_id: j7gj1m07g job_status: Passed - torchscript_onnx_ort: - inference_time: 76545.0 - throughput: 13.064210595074792 + torchscript_onnx: + inference_time: 68957.0 + throughput: 14.501790971184942 estimated_peak_memory_range: - min: 684032 - max: 513094432 + min: 404545536 + max: 941258256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jvgd7vklg + job_id: j0pxmj7jg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +112,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:22:03Z' + timestamp: '2024-06-22T23:31:37Z' - torchscript_onnx_tflite: - inference_time: 78627.0 - throughput: 12.718277436504 + inference_time: 78470.0 + throughput: 12.743723716069836 estimated_peak_memory_range: - min: 110592 - max: 6215968 + min: 106496 + max: 5373496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +126,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 535 - job_id: j7gjkl985 + job_id: jlpe21r7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:21:57Z' - - torchscript_onnx_ort: - inference_time: 102862.0 - throughput: 9.721763138962883 + timestamp: '2024-06-22T23:31:27Z' + - torchscript_onnx_tflite: + inference_time: 78565.0 + throughput: 12.728314134792846 + estimated_peak_memory_range: + min: 139264 + max: 3777240 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 535 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 535 + job_id: jygzw9xzg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:31:28Z' + - torchscript_onnx: + inference_time: 94524.0 + throughput: 10.579323769624645 estimated_peak_memory_range: - min: 158560256 - max: 158560256 + min: 251416576 + max: 251416576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jz57vdqr5 + job_id: jo5m42wy5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:22:04Z' + timestamp: '2024-06-22T23:31:38Z' diff --git a/qai_hub_models/models/whisper_base_en/demo.py b/qai_hub_models/models/whisper_base_en/demo.py index 9bbf714a..7dd801d6 100644 --- a/qai_hub_models/models/whisper_base_en/demo.py +++ b/qai_hub_models/models/whisper_base_en/demo.py @@ -6,8 +6,8 @@ from qai_hub_models.models.whisper_base_en.model import WhisperBaseEn -def main(): - whisper_demo(WhisperBaseEn) +def main(is_test: bool = False): + whisper_demo(WhisperBaseEn, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/whisper_base_en/export.py b/qai_hub_models/models/whisper_base_en/export.py index 4bb6b358..d7e186e1 100644 --- a/qai_hub_models/models/whisper_base_en/export.py +++ b/qai_hub_models/models/whisper_base_en/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml index 881707cf..95dfe932 100644 --- a/qai_hub_models/models/whisper_base_en/perf.yaml +++ b/qai_hub_models/models/whisper_base_en/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 158811.0 - throughput: 6.296793043303046 + inference_time: 159078.0 + throughput: 6.2862243679201395 estimated_peak_memory_range: - min: 31092736 - max: 131633968 + min: 16384 + max: 101374008 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 419 layers_on_cpu: 0 total_layers: 419 - job_id: j0pxe1w95 + job_id: jopr9q1vp job_status: Passed torchscript_onnx_qnn: - inference_time: 624615.0 - throughput: 1.6009862075038224 + inference_time: 467575.0 + throughput: 2.1386943271132974 estimated_peak_memory_range: - min: 131072 - max: 82142360 + min: 69632 + max: 88169880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 580 - job_id: j2p0erne5 + job_id: jw56vjqvp job_status: Passed - torchscript_onnx_ort: - inference_time: 394348.0 - throughput: 2.5358312962155254 + torchscript_onnx: + inference_time: 426509.0 + throughput: 2.3446164090324 estimated_peak_memory_range: - min: 4792320 - max: 165488160 + min: 73859072 + max: 241652560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: j1pvzwjmg + job_id: jvgd097kp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:22:51Z' + timestamp: '2024-06-22T23:32:31Z' - torchscript_onnx_tflite: - inference_time: 122023.0 - throughput: 8.195176319218508 + inference_time: 123208.0 + throughput: 8.116356080773976 estimated_peak_memory_range: - min: 37249024 - max: 82154976 + min: 35913728 + max: 83180512 primary_compute_unit: GPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 419 layers_on_cpu: 0 total_layers: 419 - job_id: jegnrejm5 + job_id: jqpynwvrg job_status: Passed torchscript_onnx_qnn: - inference_time: 452457.0 - throughput: 2.210154777139043 + inference_time: 331018.0 + throughput: 3.0209837531493755 estimated_peak_memory_range: min: 0 - max: 198495008 + max: 191295920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 580 - job_id: jogkryjo5 + job_id: jwgom0e45 job_status: Passed - torchscript_onnx_ort: - inference_time: 300384.0 - throughput: 3.3290721210184295 + torchscript_onnx: + inference_time: 302687.0 + throughput: 3.3037428102297093 estimated_peak_memory_range: - min: 62181376 - max: 262749552 + min: 66433024 + max: 270943840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jlpe4vj05 + job_id: jmg9819vp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:22:52Z' + timestamp: '2024-06-22T23:32:33Z' - torchscript_onnx_tflite: - inference_time: 158001.0 - throughput: 6.329073866621098 + inference_time: 157415.0 + throughput: 6.352634755264746 estimated_peak_memory_range: - min: 12288 - max: 104601560 + min: 22224896 + max: 123803800 primary_compute_unit: GPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 419 layers_on_cpu: 0 total_layers: 419 - job_id: jep23m2mg + job_id: j1p889wzp job_status: Passed torchscript_onnx_qnn: - inference_time: 623834.0 - throughput: 1.602990539149838 + inference_time: 467371.0 + throughput: 2.1396278331347047 estimated_peak_memory_range: - min: 139264 - max: 76510216 + min: 53248 + max: 90413376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 580 - job_id: j1p3qmyz5 + job_id: jygzw9vzg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:22:49Z' + timestamp: '2024-06-22T23:32:26Z' + - torchscript_onnx_tflite: + inference_time: 160146.0 + throughput: 6.244302074357149 + estimated_peak_memory_range: + min: 0 + max: 61744728 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 419 + layers_on_cpu: 0 + total_layers: 419 + job_id: jn5qwk975 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 460716.0 + throughput: 2.1705345592512524 + estimated_peak_memory_range: + min: 0 + max: 66333424 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jmg9819qp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:32:29Z' - torchscript_onnx_qnn: - inference_time: 454926.0 - throughput: 2.198159700698575 + inference_time: 432301.0 + throughput: 2.313203069157832 estimated_peak_memory_range: min: 962560 max: 962560 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 579 - job_id: j1glekjlp + job_id: j7gj1mk7g job_status: Passed - torchscript_onnx_ort: - inference_time: 383597.0 - throughput: 2.606902556589337 + torchscript_onnx: + inference_time: 383599.0 + throughput: 2.606888964778323 estimated_peak_memory_range: - min: 139669504 - max: 139669504 + min: 138711040 + max: 138711040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jz5wm9jjg + job_id: jvgd097lp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +256,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:22:54Z' + timestamp: '2024-06-22T23:32:35Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 24389.0 - throughput: 41.00209110664644 + inference_time: 23491.0 + throughput: 42.56949470009791 estimated_peak_memory_range: - min: 5771264 - max: 8649416 + min: 5779456 + max: 8829744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +272,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jo5mvzjq5 + job_id: jep2j63x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 22769.0 - throughput: 43.91936404760859 + inference_time: 23659.0 + throughput: 42.267213322625636 estimated_peak_memory_range: - min: 42414080 - max: 60923784 + min: 42450944 + max: 57304400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +287,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: j1p8w7l8p + job_id: j1p383qx5 job_status: Passed - torchscript_onnx_ort: - inference_time: 24751.0 - throughput: 40.402407983515815 + torchscript_onnx: + inference_time: 17587.0 + throughput: 56.860180815374996 estimated_peak_memory_range: - min: 12656640 - max: 328987984 + min: 11915264 + max: 331768528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +302,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: j7gjklj85 + job_id: jz5wxvmjp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +311,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:22:51Z' + timestamp: '2024-06-22T23:32:31Z' - torchscript_onnx_tflite: - inference_time: 18854.0 - throughput: 53.039142887450936 + inference_time: 19050.0 + throughput: 52.493438320209975 estimated_peak_memory_range: - min: 4575232 - max: 93812240 + min: 3858432 + max: 104109312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +325,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jopr1yzeg + job_id: j2p0kqe25 job_status: Passed torchscript_onnx_qnn: - inference_time: 18709.0 - throughput: 53.450211128333954 + inference_time: 18571.0 + throughput: 53.84739647838027 estimated_peak_memory_range: - min: 42438656 - max: 323848592 + min: 42414080 + max: 290343760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +340,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jn5q92jmp + job_id: j1pv4oz7p job_status: Passed - torchscript_onnx_ort: - inference_time: 20257.0 - throughput: 49.36565137976996 + torchscript_onnx: + inference_time: 14198.0 + throughput: 70.4324552753909 estimated_peak_memory_range: - min: 52916224 - max: 140494080 + min: 72601600 + max: 138332928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +355,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: jygzv716p + job_id: jnp13lql5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +364,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:22:53Z' + timestamp: '2024-06-22T23:32:33Z' - torchscript_onnx_tflite: - inference_time: 23324.0 - throughput: 42.87429257417253 + inference_time: 23883.0 + throughput: 41.87078675208307 estimated_peak_memory_range: - min: 5750784 - max: 9075392 + min: 5771264 + max: 10399912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +378,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jqpyvd94p + job_id: jogkdnryp job_status: Passed torchscript_onnx_qnn: - inference_time: 24053.0 - throughput: 41.57485552737704 + inference_time: 22785.0 + throughput: 43.88852315119596 estimated_peak_memory_range: - min: 42450944 - max: 59016968 + min: 42438656 + max: 59162872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +393,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jwgoevjdp + job_id: jz5wxvmzp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,10 +402,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:22:49Z' + timestamp: '2024-06-22T23:32:27Z' + - torchscript_onnx_tflite: + inference_time: 23568.0 + throughput: 42.43041412084182 + estimated_peak_memory_range: + min: 5783552 + max: 8704464 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: j1gl7zee5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 23732.0 + throughput: 42.13719871902916 + estimated_peak_memory_range: + min: 42446848 + max: 58833576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 821 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 821 + job_id: jnp13lqk5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:32:29Z' - torchscript_onnx_qnn: - inference_time: 13816.0 - throughput: 72.37984944991314 + inference_time: 10875.0 + throughput: 91.95402298850574 estimated_peak_memory_range: min: 42455040 max: 42455040 @@ -376,14 +454,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jw56q1k7g + job_id: jlpe2147p job_status: Passed - torchscript_onnx_ort: - inference_time: 20016.0 - throughput: 49.96003197442047 + torchscript_onnx: + inference_time: 14677.0 + throughput: 68.13381481229133 estimated_peak_memory_range: - min: 45969408 - max: 45969408 + min: 52908032 + max: 52908032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: jmg9946vg + job_id: jz576w6rg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +478,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:22:55Z' + timestamp: '2024-06-22T23:32:35Z' diff --git a/qai_hub_models/models/whisper_base_en/requirements.txt b/qai_hub_models/models/whisper_base_en/requirements.txt index fa34d4f8..1b6cbc24 100644 --- a/qai_hub_models/models/whisper_base_en/requirements.txt +++ b/qai_hub_models/models/whisper_base_en/requirements.txt @@ -1,2 +1,4 @@ openai-whisper==20230314 scipy==1.8.1 +audio2numpy==0.1.2 +samplerate==0.2.1 diff --git a/qai_hub_models/models/whisper_base_en/test.py b/qai_hub_models/models/whisper_base_en/test.py index aeb74e53..d94d4bf5 100644 --- a/qai_hub_models/models/whisper_base_en/test.py +++ b/qai_hub_models/models/whisper_base_en/test.py @@ -19,4 +19,4 @@ def test_transcribe(): def test_demo(): - demo_main() + demo_main(is_test=True) diff --git a/qai_hub_models/models/whisper_small_en/demo.py b/qai_hub_models/models/whisper_small_en/demo.py index c3100f59..a290f016 100644 --- a/qai_hub_models/models/whisper_small_en/demo.py +++ b/qai_hub_models/models/whisper_small_en/demo.py @@ -6,8 +6,8 @@ from qai_hub_models.models.whisper_small_en.model import WhisperSmallEn -def main(): - whisper_demo(WhisperSmallEn) +def main(is_test: bool = False): + whisper_demo(WhisperSmallEn, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py index e6937074..5a71e037 100644 --- a/qai_hub_models/models/whisper_small_en/export.py +++ b/qai_hub_models/models/whisper_small_en/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml index aae6bde6..7b5b5efb 100644 --- a/qai_hub_models/models/whisper_small_en/perf.yaml +++ b/qai_hub_models/models/whisper_small_en/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 610635.0 - throughput: 1.6376395064154528 + inference_time: 615850.0 + throughput: 1.6237720224080539 estimated_peak_memory_range: - min: 8286208 - max: 437557824 + min: 55234560 + max: 505232120 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 911 layers_on_cpu: 0 total_layers: 911 - job_id: jz57vdzr5 + job_id: jo5m424q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1969063.0 - throughput: 0.5078557669307686 + inference_time: 1802215.0 + throughput: 0.5548727538057335 estimated_peak_memory_range: - min: 1097728 - max: 226008440 + min: 61440 + max: 235566064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1474 - job_id: jep23m8mg + job_id: jn5qwkwm5 + job_status: Passed + torchscript_onnx: + inference_time: 1696162.0 + throughput: 0.5895663268013315 + estimated_peak_memory_range: + min: 16384 + max: 452284040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 884 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 884 + job_id: jmg9818vp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:24:00Z' + timestamp: '2024-06-22T23:33:56Z' - torchscript_onnx_tflite: - inference_time: 467725.0 - throughput: 2.1380084451333583 + inference_time: 463964.0 + throughput: 2.155339638420222 estimated_peak_memory_range: - min: 111644672 - max: 209573760 + min: 1200128 + max: 100685248 primary_compute_unit: GPU precision: fp16 layer_info: @@ -86,14 +103,14 @@ models: layers_on_gpu: 911 layers_on_cpu: 0 total_layers: 911 - job_id: j0pxe1v95 + job_id: jopr9q9ep job_status: Passed torchscript_onnx_qnn: - inference_time: 1435234.0 - throughput: 0.6967504950412268 + inference_time: 1352979.0 + throughput: 0.7391097718442046 estimated_peak_memory_range: min: 0 - max: 570396624 + max: 565732080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1474 - job_id: j2p0erye5 + job_id: jw56vjv7p job_status: Passed - torchscript_onnx_ort: - inference_time: 1240429.0 - throughput: 0.8061727031535058 + torchscript_onnx: + inference_time: 1259184.0 + throughput: 0.7941651101030509 estimated_peak_memory_range: - min: 350531584 - max: 914876112 + min: 995328 + max: 564432352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: j1pvzw3mg + job_id: jvgd090lp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -125,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:24:10Z' + timestamp: '2024-06-22T23:33:59Z' - torchscript_onnx_tflite: - inference_time: 611130.0 - throughput: 1.6363130594145272 + inference_time: 611863.0 + throughput: 1.6343527881241389 estimated_peak_memory_range: - min: 68825088 - max: 504071032 + min: 12288 + max: 454968200 primary_compute_unit: GPU precision: fp16 layer_info: @@ -139,7 +156,7 @@ models: layers_on_gpu: 911 layers_on_cpu: 0 total_layers: 911 - job_id: jegnre2m5 + job_id: jqpynwn4g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +165,33 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:23:58Z' + timestamp: '2024-06-22T23:33:39Z' + - torchscript_onnx_tflite: + inference_time: 613954.0 + throughput: 1.6287865214657775 + estimated_peak_memory_range: + min: 12288 + max: 456527760 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 911 + layers_on_cpu: 0 + total_layers: 911 + job_id: j1p88988p + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:33:41Z' - torchscript_onnx_qnn: - inference_time: 1682160.0 - throughput: 0.5944737718171874 + inference_time: 1093546.0 + throughput: 0.9144562734443727 estimated_peak_memory_range: min: 962560 max: 962560 @@ -162,14 +202,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1473 - job_id: jogkryzo5 + job_id: jwgom0md5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1497981.0 - throughput: 0.667565209438571 + torchscript_onnx: + inference_time: 1504227.0 + throughput: 0.6647932792058645 estimated_peak_memory_range: - min: 555839488 - max: 555839488 + min: 555786240 + max: 555786240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -177,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: jlpe4v905 + job_id: jqp48omlg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -186,15 +226,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:24:12Z' + timestamp: '2024-06-22T23:34:01Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 26644.0 - throughput: 37.53190211679928 + inference_time: 26012.0 + throughput: 38.44379517145933 estimated_peak_memory_range: - min: 16855040 - max: 20865456 + min: 16715776 + max: 20608968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -202,14 +242,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jqp4jwqlp + job_id: jegnxyxm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 24731.0 - throughput: 40.43508147668918 + inference_time: 24923.0 + throughput: 40.123580628335276 estimated_peak_memory_range: - min: 124076032 - max: 200059296 + min: 120889344 + max: 195142832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -217,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jqpyvde4p + job_id: j1gl7z7l5 + job_status: Passed + torchscript_onnx: + inference_time: 63263.0 + throughput: 15.807027804561908 + estimated_peak_memory_range: + min: 44670976 + max: 556541120 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 2302 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 2302 + job_id: jnp13l3l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -226,13 +281,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:24:00Z' + timestamp: '2024-06-22T23:33:57Z' - torchscript_onnx_tflite: - inference_time: 19793.0 - throughput: 50.52291214065579 + inference_time: 19740.0 + throughput: 50.65856129685917 estimated_peak_memory_range: - min: 16777216 - max: 1154461280 + min: 15392768 + max: 1189848448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -240,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jo5mvzrq5 + job_id: jep2j6jm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19453.0 - throughput: 51.40595280933532 + inference_time: 19402.0 + throughput: 51.541078239356764 estimated_peak_memory_range: - min: 72151040 - max: 864487680 + min: 86843392 + max: 784204144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -255,14 +310,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: j1p8w7o8p + job_id: j1p3838z5 job_status: Passed - torchscript_onnx_ort: - inference_time: 53273.0 - throughput: 18.77123495954799 + torchscript_onnx: + inference_time: 53333.0 + throughput: 18.750117188232426 estimated_peak_memory_range: - min: 50139136 - max: 319234896 + min: 86953984 + max: 307892128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -270,7 +325,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: j7gjklx85 + job_id: jz576wkrg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -279,13 +334,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:24:11Z' + timestamp: '2024-06-22T23:33:59Z' - torchscript_onnx_tflite: - inference_time: 27029.0 - throughput: 36.997299197158604 + inference_time: 27116.0 + throughput: 36.87859566307715 estimated_peak_memory_range: - min: 16769024 - max: 20284792 + min: 14921728 + max: 18533312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -293,14 +348,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jopr1ykeg + job_id: j2p0kqke5 job_status: Passed torchscript_onnx_qnn: - inference_time: 25818.0 - throughput: 38.73266713145867 + inference_time: 24249.0 + throughput: 41.23881397171017 estimated_peak_memory_range: - min: 127201280 - max: 197556544 + min: 127119360 + max: 201133888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -308,7 +363,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jw56q167g + job_id: jlpe2120p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -317,13 +372,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:24:07Z' + timestamp: '2024-06-22T23:33:52Z' + - torchscript_onnx_tflite: + inference_time: 26499.0 + throughput: 37.73727310464546 + estimated_peak_memory_range: + min: 12496896 + max: 15155192 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 2573 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 2573 + job_id: jogkdndop + job_status: Passed + torchscript_onnx_qnn: + inference_time: 25155.0 + throughput: 39.75352812562115 + estimated_peak_memory_range: + min: 127160320 + max: 197620136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 2255 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 2255 + job_id: jz5wxvxjp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:33:55Z' - torchscript_onnx_qnn: - inference_time: 20402.0 - throughput: 49.01480247034605 + inference_time: 17206.0 + throughput: 58.119260723003606 estimated_peak_memory_range: - min: 127381504 - max: 127381504 + min: 127369216 + max: 127369216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -331,14 +424,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jn5q928mp + job_id: j1pv4o4mp job_status: Passed - torchscript_onnx_ort: - inference_time: 53485.0 - throughput: 18.696830887164626 + torchscript_onnx: + inference_time: 51986.0 + throughput: 19.235948139883813 estimated_peak_memory_range: - min: 342065152 - max: 342065152 + min: 211558400 + max: 211558400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -346,7 +439,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: jygzv7e6p + job_id: j0pxmj39g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -355,4 +448,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:24:13Z' + timestamp: '2024-06-22T23:34:02Z' diff --git a/qai_hub_models/models/whisper_small_en/requirements.txt b/qai_hub_models/models/whisper_small_en/requirements.txt index fa34d4f8..1b6cbc24 100644 --- a/qai_hub_models/models/whisper_small_en/requirements.txt +++ b/qai_hub_models/models/whisper_small_en/requirements.txt @@ -1,2 +1,4 @@ openai-whisper==20230314 scipy==1.8.1 +audio2numpy==0.1.2 +samplerate==0.2.1 diff --git a/qai_hub_models/models/whisper_small_en/test.py b/qai_hub_models/models/whisper_small_en/test.py index aeb74e53..d94d4bf5 100644 --- a/qai_hub_models/models/whisper_small_en/test.py +++ b/qai_hub_models/models/whisper_small_en/test.py @@ -19,4 +19,4 @@ def test_transcribe(): def test_demo(): - demo_main() + demo_main(is_test=True) diff --git a/qai_hub_models/models/whisper_tiny_en/demo.py b/qai_hub_models/models/whisper_tiny_en/demo.py index 073ab120..c1f04b61 100644 --- a/qai_hub_models/models/whisper_tiny_en/demo.py +++ b/qai_hub_models/models/whisper_tiny_en/demo.py @@ -6,8 +6,8 @@ from qai_hub_models.models.whisper_tiny_en.model import WhisperTinyEn -def main(): - whisper_demo(WhisperTinyEn) +def main(is_test: bool = False): + whisper_demo(WhisperTinyEn, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py index 050e09fa..913205c8 100644 --- a/qai_hub_models/models/whisper_tiny_en/export.py +++ b/qai_hub_models/models/whisper_tiny_en/export.py @@ -33,7 +33,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, components: Optional[List[str]] = None, skip_profiling: bool = False, @@ -127,7 +127,6 @@ def export_model( for component_name, component in components_dict.items(): # Trace the model input_spec = component.get_input_spec() - component.eval() source_model = torch.jit.trace( component.to("cpu"), make_torch_inputs(input_spec) ) @@ -194,7 +193,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml index cf5d7cdb..84c8085d 100644 --- a/qai_hub_models/models/whisper_tiny_en/perf.yaml +++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 68470.0 - throughput: 14.604936468526361 + inference_time: 69532.0 + throughput: 14.38186734165564 estimated_peak_memory_range: - min: 16613376 - max: 64496288 + min: 20480 + max: 63826120 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 271 layers_on_cpu: 0 total_layers: 271 - job_id: jnp1q80lg + job_id: jopr9qoep job_status: Passed torchscript_onnx_qnn: - inference_time: 286944.0 - throughput: 3.485000557600089 + inference_time: 285533.0 + throughput: 3.502222159960495 estimated_peak_memory_range: min: 1019904 - max: 52873616 + max: 55271304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jegnreym5 + job_id: jw56vjr7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +74,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:24:44Z' + timestamp: '2024-06-22T23:34:37Z' - torchscript_onnx_tflite: - inference_time: 54112.0 - throughput: 18.48018923713779 + inference_time: 54335.0 + throughput: 18.40434342504831 estimated_peak_memory_range: - min: 0 - max: 36724816 + min: 4096 + max: 34290736 primary_compute_unit: GPU precision: fp16 layer_info: @@ -86,14 +88,14 @@ models: layers_on_gpu: 271 layers_on_cpu: 0 total_layers: 271 - job_id: jz57vdwr5 + job_id: jqpynwq4g job_status: Passed torchscript_onnx_qnn: - inference_time: 218003.0 - throughput: 4.587092838171952 + inference_time: 221234.0 + throughput: 4.520100888651835 estimated_peak_memory_range: - min: 406650880 - max: 543573456 + min: 995328 + max: 136088064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jep23m6mg + job_id: jwgom0od5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +112,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:24:46Z' + timestamp: '2024-06-22T23:34:40Z' - torchscript_onnx_tflite: - inference_time: 68514.0 - throughput: 14.595557112414982 + inference_time: 68688.0 + throughput: 14.558583740973678 estimated_peak_memory_range: - min: 18030592 - max: 66868584 + min: 13635584 + max: 52716856 primary_compute_unit: GPU precision: fp16 layer_info: @@ -124,14 +126,14 @@ models: layers_on_gpu: 271 layers_on_cpu: 0 total_layers: 271 - job_id: j0pxe1j95 + job_id: j1p88968p job_status: Passed torchscript_onnx_qnn: - inference_time: 288936.0 - throughput: 3.4609740565384723 + inference_time: 285382.0 + throughput: 3.5040752395035426 estimated_peak_memory_range: - min: 159744 - max: 53294424 + min: 16384 + max: 53570416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +141,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jogkryno5 + job_id: jygzw986g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +150,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:24:50Z' + timestamp: '2024-06-22T23:34:44Z' + - torchscript_onnx_tflite: + inference_time: 68319.0 + throughput: 14.637216586893837 + estimated_peak_memory_range: + min: 16384 + max: 39529544 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 271 + layers_on_cpu: 0 + total_layers: 271 + job_id: jn5qwkzm5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 287850.0 + throughput: 3.4740316136876848 + estimated_peak_memory_range: + min: 135168 + max: 53407928 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 338 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 338 + job_id: jmg981kvp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:34:47Z' - torchscript_onnx_qnn: - inference_time: 237871.0 - throughput: 4.203959288858247 + inference_time: 240171.0 + throughput: 4.1637000303950105 estimated_peak_memory_range: min: 962560 max: 962560 @@ -162,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: j2p0erqe5 + job_id: j7gj1mo8g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -171,15 +211,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:24:48Z' + timestamp: '2024-06-22T23:34:42Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 3853.0 - throughput: 259.53802232026993 + inference_time: 3849.0 + throughput: 259.80774227071964 estimated_peak_memory_range: min: 2973696 - max: 6011536 + max: 7559016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -187,14 +227,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jvgd7vwlg + job_id: jep2j64m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3672.0 - throughput: 272.33115468409585 + inference_time: 3616.0 + throughput: 276.5486725663717 estimated_peak_memory_range: - min: 21250048 - max: 48536944 + min: 9072640 + max: 46683616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -202,14 +242,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jopr1yqeg + job_id: j1p383xz5 job_status: Passed - torchscript_onnx_ort: - inference_time: 5299.0 - throughput: 188.71485185884129 + torchscript_onnx: + inference_time: 5400.0 + throughput: 185.1851851851852 estimated_peak_memory_range: - min: 6336512 - max: 214237680 + min: 6352896 + max: 217301960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -217,7 +257,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 462 - job_id: jw56q1j7g + job_id: jz576w7rg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -226,13 +266,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:24:52Z' + timestamp: '2024-06-22T23:34:49Z' - torchscript_onnx_tflite: - inference_time: 2973.0 - throughput: 336.3605785401951 + inference_time: 3151.0 + throughput: 317.35956839098696 estimated_peak_memory_range: - min: 942080 - max: 226696352 + min: 1753088 + max: 235076768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -240,14 +280,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jqp4jwolp + job_id: j2p0kqde5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2764.0 - throughput: 361.794500723589 + inference_time: 2841.0 + throughput: 351.98873636043646 estimated_peak_memory_range: - min: 0 - max: 138707216 + min: 21213184 + max: 142867088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -255,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jqpyvdw4p + job_id: j1pv4oemp job_status: Passed - torchscript_onnx_ort: - inference_time: 4502.0 - throughput: 222.1235006663705 + torchscript_onnx: + inference_time: 4262.0 + throughput: 234.6316283435007 estimated_peak_memory_range: - min: 27127808 - max: 85392304 + min: 27529216 + max: 76179792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -270,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 462 - job_id: jwgoev0dp + job_id: j0pxmjd9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -279,13 +319,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:24:54Z' + timestamp: '2024-06-22T23:34:51Z' - torchscript_onnx_tflite: - inference_time: 3909.0 - throughput: 255.81990278843693 + inference_time: 3881.0 + throughput: 257.6655501159495 estimated_peak_memory_range: - min: 2981888 - max: 5533208 + min: 2973696 + max: 5500200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -293,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jo5mvz2q5 + job_id: jogkdnoop job_status: Passed torchscript_onnx_qnn: - inference_time: 3717.0 - throughput: 269.03416733925206 + inference_time: 3653.0 + throughput: 273.7476047084588 estimated_peak_memory_range: - min: 21213184 - max: 37347800 + min: 21233664 + max: 48012888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -308,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jn5q92kmp + job_id: jz5wxv8jp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -317,10 +357,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:24:50Z' + timestamp: '2024-06-22T23:34:45Z' + - torchscript_onnx_tflite: + inference_time: 3865.0 + throughput: 258.73221216041395 + estimated_peak_memory_range: + min: 2994176 + max: 5685640 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: j1gl7zol5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3721.0 + throughput: 268.74496103198067 + estimated_peak_memory_range: + min: 21221376 + max: 35267480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jnp13l7l5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:34:47Z' - torchscript_onnx_qnn: - inference_time: 3772.0 - throughput: 265.11134676564154 + inference_time: 3173.0 + throughput: 315.1591553734636 estimated_peak_memory_range: min: 21229568 max: 21229568 @@ -331,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: j1p8w798p + job_id: jlpe2180p job_status: Passed - torchscript_onnx_ort: - inference_time: 4450.0 - throughput: 224.7191011235955 + torchscript_onnx: + inference_time: 4549.0 + throughput: 219.82853374367994 estimated_peak_memory_range: - min: 19857408 - max: 19857408 + min: 21237760 + max: 21237760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -346,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 462 - job_id: j7gjklm85 + job_id: jegnxy7m5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -355,4 +433,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:24:56Z' + timestamp: '2024-06-22T23:34:53Z' diff --git a/qai_hub_models/models/whisper_tiny_en/requirements.txt b/qai_hub_models/models/whisper_tiny_en/requirements.txt index fa34d4f8..1b6cbc24 100644 --- a/qai_hub_models/models/whisper_tiny_en/requirements.txt +++ b/qai_hub_models/models/whisper_tiny_en/requirements.txt @@ -1,2 +1,4 @@ openai-whisper==20230314 scipy==1.8.1 +audio2numpy==0.1.2 +samplerate==0.2.1 diff --git a/qai_hub_models/models/whisper_tiny_en/test.py b/qai_hub_models/models/whisper_tiny_en/test.py index aeb74e53..d94d4bf5 100644 --- a/qai_hub_models/models/whisper_tiny_en/test.py +++ b/qai_hub_models/models/whisper_tiny_en/test.py @@ -19,4 +19,4 @@ def test_transcribe(): def test_demo(): - demo_main() + demo_main(is_test=True) diff --git a/qai_hub_models/models/wideresnet50/export.py b/qai_hub_models/models/wideresnet50/export.py index a5bd28dc..b6260264 100644 --- a/qai_hub_models/models/wideresnet50/export.py +++ b/qai_hub_models/models/wideresnet50/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml index 8a782d43..60b76fbc 100644 --- a/qai_hub_models/models/wideresnet50/perf.yaml +++ b/qai_hub_models/models/wideresnet50/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: WideResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 4868.0 - throughput: 205.42317173377157 + inference_time: 4893.0 + throughput: 204.37359493153485 estimated_peak_memory_range: - min: 24576 - max: 2240024 + min: 16384 + max: 2332784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jz5wm9vjg + job_id: jqpynw74g job_status: Passed torchscript_onnx_qnn: - inference_time: 5652.0 - throughput: 176.92852087756546 + inference_time: 5681.0 + throughput: 176.0253476500616 estimated_peak_memory_range: - min: 622592 - max: 250014320 + min: 618496 + max: 355223896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jvgd7v9lg + job_id: jn5qwkmm5 job_status: Passed - torchscript_onnx_ort: - inference_time: 5471.0 - throughput: 182.78194114421495 + torchscript_onnx: + inference_time: 5421.0 + throughput: 184.46781036709095 estimated_peak_memory_range: - min: 20480 - max: 445804176 + min: 12288 + max: 475100016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jvgd7vleg + job_id: j1pv4o9mp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:25:28Z' + timestamp: '2024-06-22T23:35:29Z' - torchscript_onnx_tflite: - inference_time: 3644.0 - throughput: 274.423710208562 + inference_time: 3626.0 + throughput: 275.78599007170436 estimated_peak_memory_range: - min: 16384 - max: 100476704 + min: 12288 + max: 101920256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jmg9941vg + job_id: j2p0kqve5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4212.0 - throughput: 237.41690408357076 + inference_time: 4210.0 + throughput: 237.52969121140143 estimated_peak_memory_range: min: 618496 - max: 53808800 + max: 50542208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5wm9n6g + job_id: j1gl7z1l5 job_status: Passed - torchscript_onnx_ort: - inference_time: 4064.0 - throughput: 246.06299212598427 + torchscript_onnx: + inference_time: 4131.0 + throughput: 242.0721374969741 estimated_peak_memory_range: min: 618496 - max: 31598192 + max: 32538336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jz57vd3l5 + job_id: j7gj1mw8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:25:29Z' + timestamp: '2024-06-22T23:35:30Z' - torchscript_onnx_tflite: - inference_time: 4872.0 - throughput: 205.2545155993432 + inference_time: 4880.0 + throughput: 204.91803278688525 estimated_peak_memory_range: min: 20480 - max: 2441976 + max: 2445640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jnp1q8llg + job_id: j1p88948p job_status: Passed torchscript_onnx_qnn: - inference_time: 5687.0 - throughput: 175.83963425356075 + inference_time: 5688.0 + throughput: 175.8087201125176 estimated_peak_memory_range: - min: 618496 - max: 354920904 + min: 647168 + max: 334059464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jnp1q8x2g + job_id: j1p383wz5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:25:27Z' + timestamp: '2024-06-22T23:35:26Z' + - torchscript_onnx_tflite: + inference_time: 4855.0 + throughput: 205.97322348094747 + estimated_peak_memory_range: + min: 139264 + max: 2157448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 79 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 79 + job_id: jogkdn9op + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5695.0 + throughput: 175.5926251097454 + estimated_peak_memory_range: + min: 618496 + max: 355056952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 126 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 126 + job_id: jwgom04d5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:35:28Z' - torchscript_onnx_qnn: - inference_time: 5842.0 - throughput: 171.17425539198905 + inference_time: 4659.0 + throughput: 214.63833440652502 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jmg994elg + job_id: jw56vjd7p job_status: Passed - torchscript_onnx_ort: - inference_time: 5121.0 - throughput: 195.27436047646944 + torchscript_onnx: + inference_time: 5080.0 + throughput: 196.8503937007874 estimated_peak_memory_range: - min: 71557120 - max: 71557120 + min: 25653248 + max: 25653248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jqp4jw0vp + job_id: jlpe21l0p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:25:30Z' + timestamp: '2024-06-22T23:35:31Z' diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py index a10d2988..4308a232 100644 --- a/qai_hub_models/models/wideresnet50_quantized/export.py +++ b/qai_hub_models/models/wideresnet50_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image_tensor" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last( "image_tensor", sample_inputs, target_runtime ) @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/wideresnet50_quantized/model.py b/qai_hub_models/models/wideresnet50_quantized/model.py index 2894b748..a0fa95da 100644 --- a/qai_hub_models/models/wideresnet50_quantized/model.py +++ b/qai_hub_models/models/wideresnet50_quantized/model.py @@ -83,5 +83,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() return cls(sim) diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml index cd023541..a53c5d22 100644 --- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: WideResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1803.0 - throughput: 554.6311702717693 + inference_time: 1816.0 + throughput: 550.6607929515418 estimated_peak_memory_range: - min: 12288 - max: 2605960 + min: 16384 + max: 2720040 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jo5mvzyw5 + job_id: jz5wxv1jp job_status: Passed torchscript_onnx_qnn: - inference_time: 2049.0 - throughput: 488.0429477794046 + inference_time: 2041.0 + throughput: 489.9559039686428 estimated_peak_memory_range: - min: 16384 - max: 124262304 + min: 0 + max: 145574320 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: j2p0er765 - job_status: Passed - torchscript_onnx_ort: - inference_time: 2037.0 - throughput: 490.9180166912126 - estimated_peak_memory_range: - min: 12288 - max: 210986456 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 83 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 83 - job_id: j1glekr8p + job_id: j0pxmjx9g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:27:50Z' + timestamp: '2024-06-22T23:37:47Z' - torchscript_onnx_tflite: - inference_time: 1386.0 - throughput: 721.5007215007215 + inference_time: 1390.0 + throughput: 719.4244604316547 estimated_peak_memory_range: min: 12288 - max: 56539024 + max: 57045488 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jegnre8r5 + job_id: jmg981xvp job_status: Passed torchscript_onnx_qnn: - inference_time: 1532.0 - throughput: 652.7415143603133 + inference_time: 1538.0 + throughput: 650.1950585175553 estimated_peak_memory_range: - min: 172032 - max: 45717904 + min: 0 + max: 42894192 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: j1p8w7vxp - job_status: Passed - torchscript_onnx_ort: - inference_time: 1574.0 - throughput: 635.3240152477764 - estimated_peak_memory_range: - min: 12288 - max: 29772112 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 83 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 83 - job_id: jw56q1l0g + job_id: jo5m428q5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:27:51Z' + timestamp: '2024-06-22T23:37:48Z' - torchscript_onnx_tflite: - inference_time: 1824.0 - throughput: 548.2456140350877 + inference_time: 1810.0 + throughput: 552.4861878453039 estimated_peak_memory_range: - min: 24576 - max: 86925416 + min: 12288 + max: 1592592 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jopr1yj9g + job_id: jnp13lvl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2034.0 - throughput: 491.6420845624385 + inference_time: 2022.0 + throughput: 494.55984174085063 estimated_peak_memory_range: - min: 12288 - max: 7539488 + min: 16384 + max: 115424408 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jn5q92o4p + job_id: jopr9qwep job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:27:49Z' + timestamp: '2024-06-22T23:37:51Z' - torchscript_onnx_tflite: - inference_time: 7862.0 - throughput: 127.1940981938438 + inference_time: 1818.0 + throughput: 550.05500550055 estimated_peak_memory_range: - min: 12288 - max: 27235632 + min: 28672 + max: 1588664 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 80 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 80 + job_id: jvgd09zlp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2027.0 + throughput: 493.33991119881597 + estimated_peak_memory_range: + min: 176128 + max: 6826976 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 78 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 78 + job_id: jep2j6em5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:37:52Z' + - torchscript_onnx_tflite: + inference_time: 8221.0 + throughput: 121.6397031991242 + estimated_peak_memory_range: + min: 20480 + max: 30028192 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jep23mn4g + job_id: jz576w9rg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:27:44Z' + timestamp: '2024-06-22T23:37:44Z' - torchscript_onnx_tflite: - inference_time: 23597.0 - throughput: 42.3782684239522 + inference_time: 23723.0 + throughput: 42.153184673102054 estimated_peak_memory_range: - min: 53248 - max: 3084328 + min: 49152 + max: 2236888 primary_compute_unit: NPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jqpyvd07p + job_id: jqp48o3lg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,13 +242,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:27:45Z' + timestamp: '2024-06-22T23:37:46Z' - torchscript_onnx_qnn: - inference_time: 1964.0 - throughput: 509.1649694501018 + inference_time: 1851.0 + throughput: 540.2485143165857 estimated_peak_memory_range: - min: 368640 - max: 368640 + min: 286720 + max: 286720 primary_compute_unit: NPU precision: int8 layer_info: @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jogkrym25 - job_status: Passed - torchscript_onnx_ort: - inference_time: 1848.0 - throughput: 541.1255411255411 - estimated_peak_memory_range: - min: 23400448 - max: 23400448 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 83 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 83 - job_id: j1p3qm2l5 + job_id: jegnxykm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:27:52Z' + timestamp: '2024-06-22T23:37:49Z' diff --git a/qai_hub_models/models/xlsr/export.py b/qai_hub_models/models/xlsr/export.py index dfc3b401..e9c6c913 100644 --- a/qai_hub_models/models/xlsr/export.py +++ b/qai_hub_models/models/xlsr/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -208,7 +207,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/xlsr/info.yaml b/qai_hub_models/models/xlsr/info.yaml index b7ff7c32..c363b60a 100644 --- a/qai_hub_models/models/xlsr/info.yaml +++ b/qai_hub_models/models/xlsr/info.yaml @@ -16,7 +16,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr technical_details: Model checkpoint: xlsr_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 22.0K Model size: 92.7 KB applicable_scenarios: diff --git a/qai_hub_models/models/xlsr/model.py b/qai_hub_models/models/xlsr/model.py index 4c3e804c..6d6a4df0 100644 --- a/qai_hub_models/models/xlsr/model.py +++ b/qai_hub_models/models/xlsr/model.py @@ -39,6 +39,5 @@ def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> XLSR: ) checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) - model.eval() return cls(model, scale_factor) diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml index 9274c714..27429f93 100644 --- a/qai_hub_models/models/xlsr/perf.yaml +++ b/qai_hub_models/models/xlsr/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: XLSR performance_metrics: - torchscript_onnx_tflite: - inference_time: 2486.0 - throughput: 402.2526146419952 + inference_time: 2535.0 + throughput: 394.47731755424064 estimated_peak_memory_range: - min: 32768 - max: 7588944 + min: 16384 + max: 1922488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j1gle1wmp + job_id: jw56vjw7p job_status: Passed torchscript_onnx_qnn: - inference_time: 1374.0 - throughput: 727.802037845706 + inference_time: 1355.0 + throughput: 738.0073800738007 estimated_peak_memory_range: - min: 24576 - max: 15889328 + min: 217088 + max: 70830008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jwgoe4dkp + job_id: j7gj1mq8g job_status: Passed - torchscript_onnx_ort: - inference_time: 1554.0 - throughput: 643.5006435006435 + torchscript_onnx: + inference_time: 1527.0 + throughput: 654.8788474132285 estimated_peak_memory_range: min: 221184 - max: 17637032 + max: 3603224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jygzv4zxp + job_id: jnp13l6l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T11:59:35Z' + timestamp: '2024-06-22T23:38:25Z' - torchscript_onnx_tflite: - inference_time: 1792.0 - throughput: 558.0357142857143 + inference_time: 1872.0 + throughput: 534.1880341880342 estimated_peak_memory_range: min: 16384 - max: 20986272 + max: 20912880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jw56qdoyg + job_id: j1p3836z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 840.0 - throughput: 1190.4761904761904 + inference_time: 832.0 + throughput: 1201.923076923077 estimated_peak_memory_range: - min: 212992 - max: 20099296 + min: 0 + max: 16360336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j1pvz92rg + job_id: jlpe21y0p job_status: Passed - torchscript_onnx_ort: - inference_time: 1035.0 - throughput: 966.1835748792271 + torchscript_onnx: + inference_time: 1002.0 + throughput: 998.003992015968 estimated_peak_memory_range: - min: 212992 - max: 14654368 + min: 12288 + max: 13808768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jz5wm1ymg + job_id: jvgd092lp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T11:59:35Z' + timestamp: '2024-06-22T23:38:26Z' - torchscript_onnx_tflite: - inference_time: 2862.0 - throughput: 349.4060097833683 + inference_time: 2663.0 + throughput: 375.51633496057076 estimated_peak_memory_range: - min: 28672 - max: 1426392 + min: 622592 + max: 2212736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j1p3qwon5 + job_id: jwgom08d5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1370.0 - throughput: 729.92700729927 + inference_time: 1351.0 + throughput: 740.1924500370096 estimated_peak_memory_range: - min: 217088 - max: 9171344 + min: 20480 + max: 8885192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jlpe4l6v5 + job_id: jz5wxv4jp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T11:59:34Z' + timestamp: '2024-06-22T23:38:23Z' + - torchscript_onnx_tflite: + inference_time: 2573.0 + throughput: 388.65137971239795 + estimated_peak_memory_range: + min: 3796992 + max: 11747264 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 13 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 16 + job_id: j1pv4o7mp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1357.0 + throughput: 736.9196757553427 + estimated_peak_memory_range: + min: 229376 + max: 5324736 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 21 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 21 + job_id: jmg981dvp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:38:24Z' - torchscript_onnx_qnn: - inference_time: 3631.0 - throughput: 275.40622418066647 + inference_time: 1500.0 + throughput: 666.6666666666666 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j7gjkw3e5 + job_id: jygzw9n6g job_status: Passed - torchscript_onnx_ort: - inference_time: 1489.0 - throughput: 671.591672263264 + torchscript_onnx: + inference_time: 1524.0 + throughput: 656.1679790026246 estimated_peak_memory_range: - min: 8957952 - max: 8957952 + min: 8962048 + max: 8962048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jnp1qvo7g + job_id: jz5wxv76p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T11:59:36Z' + timestamp: '2024-06-22T23:38:27Z' diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py index a0ddab0f..98bfcd16 100644 --- a/qai_hub_models/models/xlsr_quantized/export.py +++ b/qai_hub_models/models/xlsr_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_0" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -216,7 +216,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_0", inference_result, target_runtime ) diff --git a/qai_hub_models/models/xlsr_quantized/info.yaml b/qai_hub_models/models/xlsr_quantized/info.yaml index cadc40fc..e6c227f0 100644 --- a/qai_hub_models/models/xlsr_quantized/info.yaml +++ b/qai_hub_models/models/xlsr_quantized/info.yaml @@ -17,7 +17,7 @@ deploy_license: source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr technical_details: Model checkpoint: xlsr_3x_checkpoint - Input resolution: 640x360 + Input resolution: 128x128 Number of parameters: 22.0K Model size: 39.0 KB applicable_scenarios: diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml index c06896f0..3c913f54 100644 --- a/qai_hub_models/models/xlsr_quantized/perf.yaml +++ b/qai_hub_models/models/xlsr_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: XLSR-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1141.0 - throughput: 876.4241893076249 + inference_time: 1129.0 + throughput: 885.7395925597874 estimated_peak_memory_range: - min: 28672 - max: 5356448 + min: 20480 + max: 1611344 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jmg99xomg + job_id: jnp13lj25 job_status: Passed torchscript_onnx_qnn: - inference_time: 799.0 - throughput: 1251.5644555694619 + inference_time: 807.0 + throughput: 1239.1573729863692 estimated_peak_memory_range: - min: 16384 - max: 12173096 + min: 86016 + max: 72738664 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j0pxed085 - job_status: Passed - torchscript_onnx_ort: - inference_time: 769.0 - throughput: 1300.3901170351105 - estimated_peak_memory_range: - min: 12288 - max: 3749080 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 21 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 21 - job_id: jep23vo6g + job_id: jegnxynr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-11T12:00:03Z' + timestamp: '2024-06-22T23:38:52Z' - torchscript_onnx_tflite: - inference_time: 943.0 - throughput: 1060.4453870625662 + inference_time: 1020.0 + throughput: 980.3921568627451 estimated_peak_memory_range: - min: 16384 - max: 21882800 + min: 12288 + max: 22181408 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jnp1qvong + job_id: jvgd093ep job_status: Passed torchscript_onnx_qnn: - inference_time: 546.0 - throughput: 1831.5018315018315 + inference_time: 549.0 + throughput: 1821.4936247723133 estimated_peak_memory_range: - min: 65536 - max: 19116992 + min: 12288 + max: 17452880 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jo5mvd975 - job_status: Passed - torchscript_onnx_ort: - inference_time: 552.0 - throughput: 1811.5942028985507 - estimated_peak_memory_range: - min: 61440 - max: 18287376 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 21 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 21 - job_id: jqpyv780p + job_id: jopr9q09p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-11T12:00:04Z' + timestamp: '2024-06-22T23:38:53Z' - torchscript_onnx_tflite: - inference_time: 1145.0 - throughput: 873.3624454148471 + inference_time: 1128.0 + throughput: 886.5248226950355 estimated_peak_memory_range: - min: 106496 - max: 1718744 + min: 20480 + max: 1490032 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jvgd7z66g + job_id: jz576w4lg job_status: Passed torchscript_onnx_qnn: - inference_time: 807.0 - throughput: 1239.1573729863692 + inference_time: 793.0 + throughput: 1261.034047919294 estimated_peak_memory_range: - min: 16384 - max: 17351048 + min: 81920 + max: 9688640 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jopr1nxkg + job_id: jqpynwx7g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-11T12:00:02Z' + timestamp: '2024-06-22T23:38:55Z' - torchscript_onnx_tflite: - inference_time: 2637.0 - throughput: 379.21880925293897 + inference_time: 1123.0 + throughput: 890.4719501335708 estimated_peak_memory_range: - min: 12288 - max: 14920896 + min: 1662976 + max: 3283600 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,7 +172,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jz57v7on5 + job_id: jqp48o1vg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 801.0 + throughput: 1248.4394506866417 + estimated_peak_memory_range: + min: 81920 + max: 73030968 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 17 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 17 + job_id: j2p0kqj65 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:38:57Z' + - torchscript_onnx_tflite: + inference_time: 2434.0 + throughput: 410.84634346754314 + estimated_peak_memory_range: + min: 32768 + max: 16271888 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: j0pxmj41g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -207,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-11T11:59:56Z' + timestamp: '2024-06-22T23:38:49Z' - torchscript_onnx_tflite: inference_time: 11523.0 throughput: 86.78295582747549 estimated_peak_memory_range: - min: 2777088 - max: 8508512 + min: 2985984 + max: 10173008 primary_compute_unit: GPU precision: int8 layer_info: @@ -221,7 +233,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 5 total_layers: 17 - job_id: jqp4j9e2p + job_id: jo5m42mw5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -230,10 +242,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-11T11:59:56Z' + timestamp: '2024-06-22T23:38:50Z' - torchscript_onnx_qnn: - inference_time: 960.0 - throughput: 1041.6666666666667 + inference_time: 862.0 + throughput: 1160.092807424594 estimated_peak_memory_range: min: 57344 max: 57344 @@ -244,22 +256,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jegnr71j5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 750.0 - throughput: 1333.3333333333333 - estimated_peak_memory_range: - min: 7811072 - max: 7811072 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 21 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 21 - job_id: j2p0ev905 + job_id: jep2j6w45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -268,4 +265,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-11T12:00:04Z' + timestamp: '2024-06-22T23:38:54Z' diff --git a/qai_hub_models/models/yolonas/export.py b/qai_hub_models/models/yolonas/export.py index 3edacba9..4dd25551 100644 --- a/qai_hub_models/models/yolonas/export.py +++ b/qai_hub_models/models/yolonas/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/yolonas/model.py b/qai_hub_models/models/yolonas/model.py index b2f5e62f..72e6bad8 100644 --- a/qai_hub_models/models/yolonas/model.py +++ b/qai_hub_models/models/yolonas/model.py @@ -105,7 +105,7 @@ def from_pretrained( input_size = cls.get_input_spec()["image"][0] model.prep_model_for_conversion(input_size=input_size) model.heads.eval_size = input_size[2:] - return cls(model.eval(), include_postprocessing) + return cls(model, include_postprocessing) def forward(self, image): """ diff --git a/qai_hub_models/models/yolonas/perf.yaml b/qai_hub_models/models/yolonas/perf.yaml index 4798067d..8e4e7038 100644 --- a/qai_hub_models/models/yolonas/perf.yaml +++ b/qai_hub_models/models/yolonas/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Yolo-NAS performance_metrics: - torchscript_onnx_tflite: - inference_time: 12935.0 - throughput: 77.30962504831851 + inference_time: 10996.0 + throughput: 90.94216078574027 estimated_peak_memory_range: - min: 245760 - max: 7789312 + min: 258048 + max: 4633448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jmg993llg + job_id: jwgom0rx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14574.0 - throughput: 68.61534239055852 + inference_time: 14669.0 + throughput: 68.17097279978185 estimated_peak_memory_range: - min: 5861376 - max: 20985784 + min: 4960256 + max: 23830208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jz57vjyl5 + job_id: jygzw9mkg job_status: Passed - torchscript_onnx_ort: - inference_time: 9949.0 - throughput: 100.51261433309881 + torchscript_onnx: + inference_time: 9961.0 + throughput: 100.39152695512499 estimated_peak_memory_range: - min: 540672 - max: 61160336 + min: 12288 + max: 59883912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jegnr96r5 + job_id: jz576welg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:29:29Z' + timestamp: '2024-06-22T23:39:48Z' - torchscript_onnx_tflite: - inference_time: 9036.0 - throughput: 110.66843736166446 + inference_time: 7459.0 + throughput: 134.0662287169862 estimated_peak_memory_range: min: 217088 - max: 99001056 + max: 101612880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jnp1qd42g + job_id: j1pv4odjp job_status: Passed torchscript_onnx_qnn: - inference_time: 10109.0 - throughput: 98.92175289346127 + inference_time: 10114.0 + throughput: 98.87284951552304 estimated_peak_memory_range: min: 4931584 - max: 92525504 + max: 86609072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jqp4jxlvp + job_id: jz5wxvl6p job_status: Passed - torchscript_onnx_ort: - inference_time: 6486.0 - throughput: 154.17823003391922 + torchscript_onnx: + inference_time: 6437.0 + throughput: 155.35187199005748 estimated_peak_memory_range: min: 4931584 - max: 56975920 + max: 49511008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jopr14v9g + job_id: jqp48oyvg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:29:30Z' + timestamp: '2024-06-22T23:39:49Z' - torchscript_onnx_tflite: - inference_time: 12949.0 - throughput: 77.22604062089736 + inference_time: 10774.0 + throughput: 92.81603861147207 estimated_peak_memory_range: - min: 225280 - max: 7472208 + min: 241664 + max: 6806360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jvgd7rxeg + job_id: j7gj1m7xg job_status: Passed torchscript_onnx_qnn: - inference_time: 15243.0 - throughput: 65.603883749918 + inference_time: 14774.0 + throughput: 67.68647624204684 estimated_peak_memory_range: - min: 4952064 - max: 23136736 + min: 4947968 + max: 22526536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jo5mvwnw5 + job_id: jnp13ln25 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +180,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:29:28Z' + timestamp: '2024-06-22T23:39:45Z' + - torchscript_onnx_tflite: + inference_time: 10799.0 + throughput: 92.60116677470135 + estimated_peak_memory_range: + min: 12288 + max: 6396832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 201 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 201 + job_id: jlpe21z1p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14766.0 + throughput: 67.72314777190844 + estimated_peak_memory_range: + min: 4960256 + max: 23017136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 289 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 289 + job_id: jvgd09dep + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:39:47Z' - torchscript_onnx_qnn: - inference_time: 11897.0 - throughput: 84.05480373203329 + inference_time: 10605.0 + throughput: 94.2951438000943 estimated_peak_memory_range: - min: 4808704 - max: 4808704 + min: 4923392 + max: 4923392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j0pxe7k15 + job_id: jmg981zlp job_status: Passed - torchscript_onnx_ort: - inference_time: 10119.0 - throughput: 98.82399446585632 + torchscript_onnx: + inference_time: 10061.0 + throughput: 99.39369843951893 estimated_peak_memory_range: - min: 5672960 - max: 5672960 + min: 4124672 + max: 4124672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jep237k4g + job_id: j0pxmjl1g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:29:31Z' + timestamp: '2024-06-22T23:39:50Z' diff --git a/qai_hub_models/models/yolonas_quantized/export.py b/qai_hub_models/models/yolonas_quantized/export.py index 86a7b17f..e8e9ed7e 100644 --- a/qai_hub_models/models/yolonas_quantized/export.py +++ b/qai_hub_models/models/yolonas_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -228,8 +228,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolonas_quantized/model.py b/qai_hub_models/models/yolonas_quantized/model.py index 52c5fb5e..d524500d 100644 --- a/qai_hub_models/models/yolonas_quantized/model.py +++ b/qai_hub_models/models/yolonas_quantized/model.py @@ -79,7 +79,6 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() final_model = cls(sim) return final_model diff --git a/qai_hub_models/models/yolonas_quantized/perf.yaml b/qai_hub_models/models/yolonas_quantized/perf.yaml index 00f23b93..57e382b2 100644 --- a/qai_hub_models/models/yolonas_quantized/perf.yaml +++ b/qai_hub_models/models/yolonas_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,19 +46,19 @@ models: - name: Yolo-NAS-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 6973.0 - throughput: 143.41029685931449 + inference_time: 4970.0 + throughput: 201.2072434607646 estimated_peak_memory_range: - min: 10432512 - max: 13902448 + min: 110592 + max: 9842984 primary_compute_unit: NPU precision: int8 layer_info: layers_on_npu: 200 layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 203 - job_id: j2p0e1z65 + layers_on_cpu: 1 + total_layers: 201 + job_id: jegnxyzr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,21 +67,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:30:46Z' + timestamp: '2024-06-22T23:41:04Z' - torchscript_onnx_tflite: - inference_time: 5003.0 - throughput: 199.8800719568259 + inference_time: 3288.0 + throughput: 304.1362530413625 estimated_peak_memory_range: - min: 356352 - max: 64309792 + min: 36864 + max: 75911536 primary_compute_unit: NPU precision: int8 layer_info: layers_on_npu: 200 layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 203 - job_id: j1p8w3qxp + layers_on_cpu: 1 + total_layers: 201 + job_id: jopr9ql9p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,21 +90,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:30:47Z' + timestamp: '2024-06-22T23:41:05Z' - torchscript_onnx_tflite: - inference_time: 6937.0 - throughput: 144.15453366008362 + inference_time: 4997.0 + throughput: 200.12007204322595 estimated_peak_memory_range: - min: 10485760 - max: 42442768 + min: 126976 + max: 192876504 primary_compute_unit: NPU precision: int8 layer_info: layers_on_npu: 200 layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 203 - job_id: jogkrle25 + layers_on_cpu: 1 + total_layers: 201 + job_id: jep2j6r45 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,50 +113,50 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:30:48Z' + timestamp: '2024-06-22T23:41:06Z' - torchscript_onnx_tflite: - inference_time: 23899.0 - throughput: 41.84275492698439 + inference_time: 4999.0 + throughput: 200.0400080016003 estimated_peak_memory_range: - min: 765952 - max: 56795680 + min: 110592 + max: 10179344 primary_compute_unit: NPU precision: int8 layer_info: layers_on_npu: 200 layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 203 - job_id: jn5q9764p + layers_on_cpu: 1 + total_layers: 201 + job_id: jqpynwo7g job_status: Passed reference_device_info: - name: RB3 Gen 2 (Proxy) - os: '12' - form_factor: Iot + name: SA8775 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: Qcs6490 - timestamp: '2024-06-08T23:30:49Z' + chipset: Sa8775p + timestamp: '2024-06-22T23:41:08Z' - torchscript_onnx_tflite: - inference_time: 131373.0 - throughput: 7.611914168055841 + inference_time: 13017.0 + throughput: 76.82261657832066 estimated_peak_memory_range: - min: 15310848 - max: 24594432 - primary_compute_unit: CPU - precision: fp32 + min: 12288 + max: 58356864 + primary_compute_unit: NPU + precision: int8 layer_info: - layers_on_npu: 0 + layers_on_npu: 200 layers_on_gpu: 0 - layers_on_cpu: 203 - total_layers: 203 - job_id: j1gle0v8p + layers_on_cpu: 1 + total_layers: 201 + job_id: j2p0kqm65 job_status: Passed reference_device_info: - name: RB5 (Proxy) + name: RB3 Gen 2 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Qcs8250 - timestamp: '2024-06-08T23:30:50Z' + chipset: Qcs6490 + timestamp: '2024-06-22T23:41:09Z' diff --git a/qai_hub_models/models/yolov6/export.py b/qai_hub_models/models/yolov6/export.py index 1b9b17c2..d9a560f7 100644 --- a/qai_hub_models/models/yolov6/export.py +++ b/qai_hub_models/models/yolov6/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml index 896ae16b..40423c23 100644 --- a/qai_hub_models/models/yolov6/perf.yaml +++ b/qai_hub_models/models/yolov6/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: Yolo-v6 performance_metrics: - torchscript_onnx_tflite: - inference_time: 7424.0 - throughput: 134.69827586206895 + inference_time: 6188.0 + throughput: 161.60310277957336 estimated_peak_memory_range: - min: 12288 - max: 3603960 + min: 53248 + max: 9162272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jz5wmdw6g + job_id: jz5wxvy6p job_status: Passed torchscript_onnx_qnn: - inference_time: 5369.0 - throughput: 186.25442354255912 + inference_time: 5354.0 + throughput: 186.77624206200971 estimated_peak_memory_range: - min: 4968448 - max: 16471240 + min: 6352896 + max: 17627936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jvgd7rneg + job_id: jz576wolg job_status: Passed - torchscript_onnx_ort: - inference_time: 7761.0 - throughput: 128.84937508053085 + torchscript_onnx: + inference_time: 6832.0 + throughput: 146.37002341920376 estimated_peak_memory_range: min: 5341184 - max: 35743744 + max: 34589032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jo5mvwew5 + job_id: jopr9qx9p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:31:24Z' + timestamp: '2024-06-22T23:41:54Z' - torchscript_onnx_tflite: - inference_time: 5294.0 - throughput: 188.89308651303364 + inference_time: 4481.0 + throughput: 223.1644722160232 estimated_peak_memory_range: - min: 40960 - max: 79662544 + min: 20480 + max: 81810416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jmg9930lg + job_id: jmg981olp job_status: Passed torchscript_onnx_qnn: - inference_time: 3862.0 - throughput: 258.9331952356292 + inference_time: 3869.0 + throughput: 258.46471956577926 estimated_peak_memory_range: min: 4931584 - max: 95031952 + max: 85673344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jz57vj2l5 + job_id: jqp48oevg job_status: Passed - torchscript_onnx_ort: - inference_time: 5600.0 - throughput: 178.57142857142858 + torchscript_onnx: + inference_time: 5175.0 + throughput: 193.23671497584542 estimated_peak_memory_range: - min: 835584 - max: 60500960 + min: 4915200 + max: 61967408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jegnr90r5 + job_id: jep2j6o45 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:31:25Z' + timestamp: '2024-06-22T23:41:55Z' - torchscript_onnx_tflite: - inference_time: 7339.0 - throughput: 136.2583458236817 + inference_time: 6227.0 + throughput: 160.59097478721696 estimated_peak_memory_range: - min: 45056 - max: 9009312 + min: 237568 + max: 15085408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jnp1qd22g + job_id: jnp13lo25 job_status: Passed torchscript_onnx_qnn: - inference_time: 5384.0 - throughput: 185.73551263001485 + inference_time: 5368.0 + throughput: 186.28912071535024 estimated_peak_memory_range: - min: 4939776 - max: 16906872 + min: 4935680 + max: 20817472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j0pxe7915 + job_id: jo5m429w5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:31:23Z' + timestamp: '2024-06-22T23:41:51Z' + - torchscript_onnx_tflite: + inference_time: 6192.0 + throughput: 161.49870801033592 + estimated_peak_memory_range: + min: 20480 + max: 9125240 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 182 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 182 + job_id: jvgd096ep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5417.0 + throughput: 184.60402436773123 + estimated_peak_memory_range: + min: 4956160 + max: 16950328 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 228 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 228 + job_id: jegnxy1r5 + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:41:53Z' - torchscript_onnx_qnn: - inference_time: 6812.0 - throughput: 146.7997651203758 + inference_time: 5423.0 + throughput: 184.39977872026554 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jqp4jxnvp + job_id: j0pxmj01g job_status: Passed - torchscript_onnx_ort: - inference_time: 6530.0 - throughput: 153.1393568147014 + torchscript_onnx: + inference_time: 6563.0 + throughput: 152.36934328813044 estimated_peak_memory_range: - min: 3538944 - max: 3538944 + min: 5021696 + max: 5021696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jopr1469g + job_id: jqpynw87g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:31:26Z' + timestamp: '2024-06-22T23:41:56Z' diff --git a/qai_hub_models/models/yolov7/export.py b/qai_hub_models/models/yolov7/export.py index ecc0f421..5b315891 100644 --- a/qai_hub_models/models/yolov7/export.py +++ b/qai_hub_models/models/yolov7/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,13 +116,12 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -168,7 +167,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -186,7 +185,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -218,7 +217,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml index 4a8b5cd0..757be007 100644 --- a/qai_hub_models/models/yolov7/perf.yaml +++ b/qai_hub_models/models/yolov7/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,34 +38,19 @@ models: - name: Yolo-v7 performance_metrics: - torchscript_onnx_tflite: - inference_time: 15912.0 - throughput: 62.845651080945196 + inference_time: 24960.0 + throughput: 40.06410256410256 estimated_peak_memory_range: - min: 36864 - max: 24453640 - primary_compute_unit: NPU + min: 38207488 + max: 71608088 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 203 - layers_on_gpu: 0 - layers_on_cpu: 12 + layers_on_npu: 0 + layers_on_gpu: 145 + layers_on_cpu: 70 total_layers: 215 - job_id: jqpyv4z7p - job_status: Passed - torchscript_onnx_ort: - inference_time: 13978.0 - throughput: 71.5409929889827 - estimated_peak_memory_range: - min: 1499136 - max: 35988136 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 213 - layers_on_gpu: 0 - layers_on_cpu: 12 - total_layers: 225 - job_id: jw56q320g + job_id: j1p889jxp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,36 +59,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:31:53Z' + timestamp: '2024-06-22T23:42:17Z' - torchscript_onnx_tflite: - inference_time: 10805.0 - throughput: 92.5497454881999 + inference_time: 18256.0 + throughput: 54.77651183172656 estimated_peak_memory_range: - min: 1200128 - max: 65074384 - primary_compute_unit: NPU + min: 77824 + max: 61690368 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 203 - layers_on_gpu: 0 - layers_on_cpu: 12 + layers_on_npu: 0 + layers_on_gpu: 145 + layers_on_cpu: 70 total_layers: 215 - job_id: j2p0e1465 - job_status: Passed - torchscript_onnx_ort: - inference_time: 8800.0 - throughput: 113.63636363636364 - estimated_peak_memory_range: - min: 7557120 - max: 68407936 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 213 - layers_on_gpu: 0 - layers_on_cpu: 12 - total_layers: 225 - job_id: j1p3q4nl5 + job_id: jogkdn62p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +82,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:31:54Z' + timestamp: '2024-06-22T23:42:18Z' - torchscript_onnx_tflite: - inference_time: 15993.0 - throughput: 62.52735571812668 + inference_time: 24597.0 + throughput: 40.65536447534252 estimated_peak_memory_range: - min: 1232896 - max: 3455120 - primary_compute_unit: NPU + min: 40337408 + max: 85297208 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 203 - layers_on_gpu: 0 - layers_on_cpu: 12 + layers_on_npu: 0 + layers_on_gpu: 145 + layers_on_cpu: 70 total_layers: 215 - job_id: j1p8w32xp + job_id: jn5qwk445 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,27 +105,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:31:48Z' - - torchscript_onnx_ort: - inference_time: 13386.0 - throughput: 74.70491558344538 + timestamp: '2024-06-22T23:42:19Z' + - torchscript_onnx_tflite: + inference_time: 24592.0 + throughput: 40.66363044892648 estimated_peak_memory_range: - min: 4964352 - max: 4964352 - primary_compute_unit: NPU + min: 40124416 + max: 91709704 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 213 - layers_on_gpu: 0 - layers_on_cpu: 12 - total_layers: 225 - job_id: jwgoe1zxp + layers_on_npu: 0 + layers_on_gpu: 145 + layers_on_cpu: 70 + total_layers: 215 + job_id: j1gl7zw85 job_status: Passed reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:31:55Z' + chipset: Sa8775p + timestamp: '2024-06-22T23:42:20Z' diff --git a/qai_hub_models/models/yolov7_quantized/export.py b/qai_hub_models/models/yolov7_quantized/export.py index a8d2b1bc..c0db3d61 100644 --- a/qai_hub_models/models/yolov7_quantized/export.py +++ b/qai_hub_models/models/yolov7_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -228,8 +228,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov7_quantized/model.py b/qai_hub_models/models/yolov7_quantized/model.py index adbbb0ae..d544a4f5 100644 --- a/qai_hub_models/models/yolov7_quantized/model.py +++ b/qai_hub_models/models/yolov7_quantized/model.py @@ -80,7 +80,6 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() final_model = cls(sim) return final_model diff --git a/qai_hub_models/models/yolov7_quantized/perf.yaml b/qai_hub_models/models/yolov7_quantized/perf.yaml index 765fdc6d..b16507b7 100644 --- a/qai_hub_models/models/yolov7_quantized/perf.yaml +++ b/qai_hub_models/models/yolov7_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: Yolo-v7-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 4596.0 - throughput: 217.58050478677112 + inference_time: 4575.0 + throughput: 218.5792349726776 estimated_peak_memory_range: min: 311296 - max: 2244624 + max: 2495600 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: j7gjk0dx5 + job_id: jmg9812lp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:32:25Z' + timestamp: '2024-06-22T23:43:00Z' - torchscript_onnx_tflite: - inference_time: 2999.0 - throughput: 333.4444814938313 + inference_time: 2954.0 + throughput: 338.52403520649966 estimated_peak_memory_range: - min: 32768 - max: 61022912 + min: 40960 + max: 63977152 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: jlpe4ro15 + job_id: jnp13l125 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:32:26Z' + timestamp: '2024-06-22T23:43:02Z' - torchscript_onnx_tflite: - inference_time: 4588.0 - throughput: 217.9598953792502 + inference_time: 4610.0 + throughput: 216.91973969631238 estimated_peak_memory_range: - min: 299008 - max: 3108488 + min: 286720 + max: 2138736 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: jygzvx2kp + job_id: jvgd094ep job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:32:27Z' + timestamp: '2024-06-22T23:43:03Z' - torchscript_onnx_tflite: - inference_time: 10699.0 - throughput: 93.46667912889055 + inference_time: 4567.0 + throughput: 218.96211955331728 estimated_peak_memory_range: - min: 266240 - max: 56452384 + min: 323584 + max: 3825968 primary_compute_unit: NPU precision: int8 layer_info: @@ -123,7 +127,30 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: jz5wmd26g + job_id: jz576wnlg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:43:04Z' + - torchscript_onnx_tflite: + inference_time: 10793.0 + throughput: 92.6526452330214 + estimated_peak_memory_range: + min: 53248 + max: 56912800 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 225 + layers_on_gpu: 0 + layers_on_cpu: 1 + total_layers: 226 + job_id: jqp48o4vg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -132,13 +159,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:32:29Z' + timestamp: '2024-06-22T23:43:05Z' - torchscript_onnx_tflite: - inference_time: 93320.0 - throughput: 10.715816545220745 + inference_time: 99875.0 + throughput: 10.012515644555695 estimated_peak_memory_range: - min: 8769536 - max: 46392104 + min: 1826816 + max: 37759640 primary_compute_unit: GPU precision: int8 layer_info: @@ -146,7 +173,7 @@ models: layers_on_gpu: 126 layers_on_cpu: 68 total_layers: 226 - job_id: jmg993jlg + job_id: j0pxmjr1g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -155,4 +182,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:32:29Z' + timestamp: '2024-06-22T23:43:06Z' diff --git a/qai_hub_models/models/yolov8_det/export.py b/qai_hub_models/models/yolov8_det/export.py index b8418123..953fc5b7 100644 --- a/qai_hub_models/models/yolov8_det/export.py +++ b/qai_hub_models/models/yolov8_det/export.py @@ -37,7 +37,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -116,7 +116,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace( model.to("cpu"), make_torch_inputs(input_spec), check_trace=False ) @@ -124,7 +123,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -170,7 +169,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -188,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/yolov8_det/model.py b/qai_hub_models/models/yolov8_det/model.py index 224497fd..b3e07074 100644 --- a/qai_hub_models/models/yolov8_det/model.py +++ b/qai_hub_models/models/yolov8_det/model.py @@ -99,7 +99,6 @@ def from_pretrained( from ultralytics import YOLO as ultralytics_YOLO model = ultralytics_YOLO(ckpt_name).model - model.eval() return cls( model, include_postprocessing, diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml index 79f7e29d..fb80858a 100644 --- a/qai_hub_models/models/yolov8_det/perf.yaml +++ b/qai_hub_models/models/yolov8_det/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,11 +38,11 @@ models: - name: YOLOv8-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 5900.0 - throughput: 169.4915254237288 + inference_time: 5199.0 + throughput: 192.34468166955185 estimated_peak_memory_range: - min: 40960 - max: 11760568 + min: 266240 + max: 2775568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +50,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: j0pxe7n35 + job_id: jw56vjm0p job_status: Passed torchscript_onnx_qnn: - inference_time: 5248.0 - throughput: 190.5487804878049 + inference_time: 5211.0 + throughput: 191.90174630589138 estimated_peak_memory_range: - min: 4919296 - max: 17813040 + min: 4206592 + max: 16408272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jep2370rg + job_id: j7gj1myxg job_status: Passed - torchscript_onnx_ort: - inference_time: 6498.0 - throughput: 153.8935056940597 + torchscript_onnx: + inference_time: 6680.0 + throughput: 149.7005988023952 estimated_peak_memory_range: - min: 8409088 - max: 39812256 + min: 5349376 + max: 36039048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jogkrl7w5 + job_id: jnp13lm25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:33:07Z' + timestamp: '2024-06-22T23:43:52Z' - torchscript_onnx_tflite: - inference_time: 4177.0 - throughput: 239.40627244433804 + inference_time: 3748.0 + throughput: 266.8089647812167 estimated_peak_memory_range: - min: 16384 - max: 87350704 + min: 12288 + max: 81814864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jo5mvwqd5 + job_id: j1p3837l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3699.0 - throughput: 270.3433360367667 + inference_time: 3693.0 + throughput: 270.7825616030328 estimated_peak_memory_range: min: 4931584 - max: 104903584 + max: 97113280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +118,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jqpyv4r8p + job_id: jlpe21x1p job_status: Passed - torchscript_onnx_ort: - inference_time: 4564.0 - throughput: 219.10604732690624 + torchscript_onnx: + inference_time: 4294.0 + throughput: 232.88309268747088 estimated_peak_memory_range: - min: 7028736 - max: 68265872 + min: 4952064 + max: 62885072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +133,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jn5q97enp + job_id: jvgd09mep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +142,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:33:08Z' + timestamp: '2024-06-22T23:43:53Z' - torchscript_onnx_tflite: - inference_time: 5907.0 - throughput: 169.29067208396816 + inference_time: 5208.0 + throughput: 192.01228878648234 estimated_peak_memory_range: - min: 245760 - max: 2242704 + min: 262144 + max: 2646912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +156,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jopr1480g + job_id: jwgom0wx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5193.0 - throughput: 192.56691700365877 + inference_time: 5205.0 + throughput: 192.12295869356387 estimated_peak_memory_range: - min: 4947968 - max: 19559888 + min: 4939776 + max: 17387896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +171,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j1p8w30kp + job_id: jz5wxvr6p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +180,48 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:33:06Z' + timestamp: '2024-06-22T23:43:50Z' + - torchscript_onnx_tflite: + inference_time: 5200.0 + throughput: 192.30769230769232 + estimated_peak_memory_range: + min: 32768 + max: 5389376 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: j1pv4omjp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5237.0 + throughput: 190.94901661256444 + estimated_peak_memory_range: + min: 4943872 + max: 17695848 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 285 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 285 + job_id: jmg981qlp + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:43:51Z' - torchscript_onnx_qnn: - inference_time: 5771.0 - throughput: 173.28019407381737 + inference_time: 5381.0 + throughput: 185.8390633711206 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -192,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j2p0e1395 + job_id: jygzw9ykg job_status: Passed - torchscript_onnx_ort: - inference_time: 6381.0 - throughput: 156.7152483936687 + torchscript_onnx: + inference_time: 6408.0 + throughput: 156.05493133583022 estimated_peak_memory_range: - min: 10723328 - max: 10723328 + min: 6709248 + max: 6709248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j1gle06jp + job_id: jz5wxvr3p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +256,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:33:09Z' + timestamp: '2024-06-22T23:43:54Z' diff --git a/qai_hub_models/models/yolov8_det_quantized/export.py b/qai_hub_models/models/yolov8_det_quantized/export.py index 1c3d53f0..6f6b4177 100644 --- a/qai_hub_models/models/yolov8_det_quantized/export.py +++ b/qai_hub_models/models/yolov8_det_quantized/export.py @@ -36,7 +36,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -126,7 +126,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -176,7 +176,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -228,8 +228,8 @@ def main(): parser = export_parser( model_cls=Model, supports_qnn=False, - supports_ort=False, - supports_precompiled_ort=False, + supports_onnx=False, + supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_det_quantized/model.py b/qai_hub_models/models/yolov8_det_quantized/model.py index 2943cb6a..bc81aab8 100644 --- a/qai_hub_models/models/yolov8_det_quantized/model.py +++ b/qai_hub_models/models/yolov8_det_quantized/model.py @@ -84,7 +84,6 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - sim.model.eval() final_model = cls(sim, False) return final_model diff --git a/qai_hub_models/models/yolov8_det_quantized/perf.yaml b/qai_hub_models/models/yolov8_det_quantized/perf.yaml index 9271d6d2..8abffb0c 100644 --- a/qai_hub_models/models/yolov8_det_quantized/perf.yaml +++ b/qai_hub_models/models/yolov8_det_quantized/perf.yaml @@ -13,6 +13,8 @@ aggregated: - QCS8550 (Proxy) - RB3 Gen 2 (Proxy) - RB5 (Proxy) + - SA8540 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -33,6 +35,8 @@ aggregated: - Qcs6490 - Qcs8250 - Qcs8550 + - Sa8540p + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -42,11 +46,11 @@ models: - name: YOLOv8-Detection-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2332.0 - throughput: 428.8164665523156 + inference_time: 2346.0 + throughput: 426.25745950554136 estimated_peak_memory_range: min: 12288 - max: 3599048 + max: 2234648 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 277 - job_id: jwgoe1kqp + job_id: jnp13lm85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:33:51Z' + timestamp: '2024-06-22T23:44:37Z' - torchscript_onnx_tflite: - inference_time: 1594.0 - throughput: 627.3525721455458 + inference_time: 1597.0 + throughput: 626.1740763932373 estimated_peak_memory_range: min: 12288 - max: 49918192 + max: 54532816 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 277 - job_id: j1pvz1rkg + job_id: jvgd09mrp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:33:52Z' + timestamp: '2024-06-22T23:44:38Z' - torchscript_onnx_tflite: - inference_time: 2326.0 - throughput: 429.9226139294927 + inference_time: 2337.0 + throughput: 427.89901583226356 estimated_peak_memory_range: min: 12288 - max: 2668824 + max: 2345960 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 277 - job_id: j7gjk02v5 + job_id: jz576w8vg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,13 +113,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:33:53Z' + timestamp: '2024-06-22T23:44:39Z' - torchscript_onnx_tflite: - inference_time: 6463.0 - throughput: 154.7269070091289 + inference_time: 2337.0 + throughput: 427.89901583226356 estimated_peak_memory_range: - min: 81920 - max: 33931536 + min: 12288 + max: 2733568 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 276 + layers_on_gpu: 0 + layers_on_cpu: 1 + total_layers: 277 + job_id: jqp48o28g + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:44:40Z' + - torchscript_onnx_tflite: + inference_time: 6534.0 + throughput: 153.04560759106212 + estimated_peak_memory_range: + min: 36864 + max: 38744896 primary_compute_unit: NPU precision: int8 layer_info: @@ -123,7 +150,7 @@ models: layers_on_gpu: 1 layers_on_cpu: 1 total_layers: 277 - job_id: jlpe4rwo5 + job_id: j0pxmjz3g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -132,13 +159,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-06-08T23:33:54Z' + timestamp: '2024-06-22T23:44:42Z' - torchscript_onnx_tflite: - inference_time: 46343.0 - throughput: 21.57823187967978 + inference_time: 46991.0 + throughput: 21.280670766742567 estimated_peak_memory_range: - min: 1802240 - max: 10846104 + min: 2846720 + max: 18859192 primary_compute_unit: NPU precision: int8 layer_info: @@ -146,7 +173,7 @@ models: layers_on_gpu: 2 layers_on_cpu: 1 total_layers: 277 - job_id: jygzvxjop + job_id: jo5m42ld5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -155,4 +182,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-06-08T23:33:55Z' + timestamp: '2024-06-22T23:44:43Z' diff --git a/qai_hub_models/models/yolov8_seg/export.py b/qai_hub_models/models/yolov8_seg/export.py index d2ecb2c9..80ec993b 100644 --- a/qai_hub_models/models/yolov8_seg/export.py +++ b/qai_hub_models/models/yolov8_seg/export.py @@ -38,7 +38,7 @@ def export_model( - device: str = "Samsung Galaxy S23", + device: str = "Samsung Galaxy S23 (Family)", chipset: Optional[str] = None, skip_profiling: bool = False, skip_inferencing: bool = False, @@ -117,7 +117,6 @@ def export_model( ) # Trace the model - model.eval() source_model = torch.jit.trace( model.to("cpu"), make_torch_inputs(input_spec), check_trace=False ) @@ -125,7 +124,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( " --force_channel_last_input image" + " --force_channel_last_output output_4" - if target_runtime != TargetRuntime.ORT + if target_runtime != TargetRuntime.ONNX else "" ) @@ -171,7 +170,7 @@ def export_model( # Convert inputs from channel first to channel last hub_inputs = ( sample_inputs - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_first_to_last("image", sample_inputs, target_runtime) ) submitted_inference_job = hub.submit_inference_job( @@ -189,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: + elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -211,7 +210,7 @@ def export_model( # Convert outputs from channel last to channel first inference_result = ( inference_result - if target_runtime == TargetRuntime.ORT + if target_runtime == TargetRuntime.ONNX else transpose_channel_last_to_first( "output_4", inference_result, target_runtime ) @@ -229,7 +228,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_seg/model.py b/qai_hub_models/models/yolov8_seg/model.py index bbd5fee2..7b64a222 100644 --- a/qai_hub_models/models/yolov8_seg/model.py +++ b/qai_hub_models/models/yolov8_seg/model.py @@ -43,7 +43,6 @@ def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS): f"Supported checkpoints are {list(SUPPORTED_WEIGHTS)}." ) model = ultralytics_YOLO(ckpt_name).model - model.eval() return cls(model) def forward(self, image: torch.Tensor): diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml index b39496c1..031576ab 100644 --- a/qai_hub_models/models/yolov8_seg/perf.yaml +++ b/qai_hub_models/models/yolov8_seg/perf.yaml @@ -9,6 +9,7 @@ aggregated: - Google Pixel 4a - Google Pixel 5a 5G - QCS8550 (Proxy) + - SA8775 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ @@ -27,6 +28,7 @@ aggregated: - Xiaomi 12 Pro supported_chipsets: - Qcs8550 + - Sa8775p - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 @@ -36,26 +38,26 @@ models: - name: YOLOv8-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 7329.0 - throughput: 136.4442625187611 + inference_time: 6377.0 + throughput: 156.81354869060686 estimated_peak_memory_range: - min: 4210688 - max: 6975488 + min: 4595712 + max: 7752744 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 336 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jo5mvw6d5 + total_layers: 336 + job_id: j1p383r35 job_status: Passed - torchscript_onnx_ort: - inference_time: 7942.0 - throughput: 125.91286829513976 + torchscript_onnx: + inference_time: 7841.0 + throughput: 127.53475322025253 estimated_peak_memory_range: - min: 14696448 - max: 42029952 + min: 18956288 + max: 44241296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +65,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jogkrlqw5 + job_id: jvgd09yrp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +74,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-06-08T23:34:30Z' + timestamp: '2024-06-22T23:45:26Z' - torchscript_onnx_tflite: - inference_time: 5452.0 - throughput: 183.41892883345562 + inference_time: 4708.0 + throughput: 212.40441801189465 estimated_peak_memory_range: - min: 3268608 - max: 101106816 + min: 36864 + max: 100005056 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 336 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jegnr9mk5 + total_layers: 336 + job_id: jwgom09q5 job_status: Passed - torchscript_onnx_ort: - inference_time: 5339.0 - throughput: 187.30099269526127 + torchscript_onnx: + inference_time: 5363.0 + throughput: 186.46280067126608 estimated_peak_memory_range: - min: 16973824 - max: 81417296 + min: 16642048 + max: 73744656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +103,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jn5q97rnp + job_id: jz576w1vg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +112,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-06-08T23:34:31Z' + timestamp: '2024-06-22T23:45:27Z' - torchscript_onnx_tflite: - inference_time: 7404.0 - throughput: 135.06212857914642 + inference_time: 6318.0 + throughput: 158.27793605571384 estimated_peak_memory_range: - min: 4583424 - max: 7403760 + min: 4599808 + max: 29252752 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 336 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jopr1420g + total_layers: 336 + job_id: j1pv4oykp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +135,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-06-08T23:34:25Z' - - torchscript_onnx_ort: - inference_time: 7762.0 - throughput: 128.83277505797474 + timestamp: '2024-06-22T23:45:18Z' + - torchscript_onnx_tflite: + inference_time: 6305.0 + throughput: 158.60428231562253 + estimated_peak_memory_range: + min: 4591616 + max: 14165424 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 336 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 336 + job_id: j7gj1m6vg + job_status: Passed + reference_device_info: + name: SA8775 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: Sa8775p + timestamp: '2024-06-22T23:45:19Z' + - torchscript_onnx: + inference_time: 7802.0 + throughput: 128.1722635221738 estimated_peak_memory_range: - min: 22315008 - max: 22315008 + min: 22433792 + max: 22433792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: j1gle02jp + job_id: jqp48o68g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +181,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-06-08T23:34:32Z' + timestamp: '2024-06-22T23:45:29Z' diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py index 184ab87f..c4c2ebb3 100644 --- a/qai_hub_models/utils/args.py +++ b/qai_hub_models/utils/args.py @@ -28,7 +28,7 @@ from qai_hub_models.utils.inference import HubModel, compile_model_from_args from qai_hub_models.utils.qai_hub_helpers import can_access_qualcomm_ai_hub -DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S23" +DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S23 (Family)" class ParseEnumAction(argparse.Action): @@ -40,9 +40,10 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.enum_type[values.upper().replace("-", "_")]) -def get_parser() -> argparse.ArgumentParser: +def get_parser(allow_dupe_args: bool = False) -> argparse.ArgumentParser: return argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + conflict_handler="resolve" if allow_dupe_args else "error", ) @@ -399,15 +400,17 @@ def _evaluate_export_common_parser( model_cls: Type[FromPretrainedTypeVar] | Type[FromPrecompiledTypeVar], supports_tflite=True, supports_qnn=True, - supports_ort=True, - supports_precompiled_ort=True, + supports_onnx=True, + supports_precompiled_qnn_onnx=True, default_runtime=TargetRuntime.TFLITE, exporting_compiled_model=False, ) -> argparse.ArgumentParser: """ Common arguments between export and evaluate scripts. """ - parser = get_parser() + # Set handler to resolve, to allow from_pretrained and get_input_spec + # to have the same argument names. + parser = get_parser(allow_dupe_args=True) if not exporting_compiled_model: # Default runtime for compiled model is fixed for given model @@ -416,10 +419,10 @@ def _evaluate_export_common_parser( available_runtimes.append(TargetRuntime.TFLITE) if supports_qnn: available_runtimes.append(TargetRuntime.QNN) - if supports_ort: - available_runtimes.append(TargetRuntime.ORT) - if supports_precompiled_ort: - available_runtimes.append(TargetRuntime.PRECOMPILED_ORT) + if supports_onnx: + available_runtimes.append(TargetRuntime.ONNX) + if supports_precompiled_qnn_onnx: + available_runtimes.append(TargetRuntime.PRECOMPILED_QNN_ONNX) default_runtime = _get_default_runtime(available_runtimes) add_target_runtime_arg( @@ -459,8 +462,8 @@ def export_parser( components: Optional[List[str]] = None, supports_tflite: bool = True, supports_qnn: bool = True, - supports_ort: bool = True, - supports_precompiled_ort: bool = True, + supports_onnx: bool = True, + supports_precompiled_qnn_onnx: bool = True, default_runtime: TargetRuntime = TargetRuntime.TFLITE, exporting_compiled_model: bool = False, default_export_device: str = DEFAULT_EXPORT_DEVICE, @@ -477,10 +480,10 @@ def export_parser( supports_qnn: Whether QNN export is supported. Default=True. - supports_ort: + supports_onnx: Whether ORT export is supported. Default=True. - supports_precompiled_ort: + supports_precompiled_qnn_onnx: Whether precompiled ORT (with QNN context binary) export is supported. Default=True. default_runtime: Which runtime to use as default if not specified in cli args. @@ -498,8 +501,8 @@ def export_parser( model_cls=model_cls, supports_tflite=supports_tflite, supports_qnn=supports_qnn, - supports_ort=supports_ort, - supports_precompiled_ort=supports_precompiled_ort, + supports_onnx=supports_onnx, + supports_precompiled_qnn_onnx=supports_precompiled_qnn_onnx, default_runtime=default_runtime, exporting_compiled_model=exporting_compiled_model, ) @@ -563,7 +566,7 @@ def evaluate_parser( supported_datasets: List[str], supports_tflite=True, supports_qnn=True, - supports_ort=True, + supports_onnx=True, default_runtime=TargetRuntime.TFLITE, ) -> argparse.ArgumentParser: """ @@ -578,7 +581,7 @@ def evaluate_parser( supports_qnn: Whether QNN export is supported. Default=True. - supports_ort: + supports_onnx: Whether ORT export is supported. Default=True. exporting_compiled_model: @@ -594,7 +597,7 @@ def evaluate_parser( model_cls=model_cls, supports_tflite=supports_tflite, supports_qnn=supports_qnn, - supports_ort=supports_ort, + supports_onnx=supports_onnx, default_runtime=default_runtime, ) parser.add_argument( diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py index adf4b2bc..eb1b0760 100644 --- a/qai_hub_models/utils/asset_loaders.py +++ b/qai_hub_models/utils/asset_loaders.py @@ -969,19 +969,21 @@ def download_file(web_url: str, dst_path: str, num_retries: int = 4) -> str: # Streaming, so we can iterate over the response. response = requests.get(web_url, stream=True) + if response.status_code != 200: + raise ValueError(f"Unable to download file at {web_url}") # Sizes in bytes. total_size = int(response.headers.get("content-length", 0)) block_size = 1024 - with tqdm(total=total_size, unit="B", unit_scale=True) as progress_bar: - with open(dst_path, "wb") as file: - for data in response.iter_content(block_size): - progress_bar.update(len(data)) - file.write(data) - - if response.status_code != 200: - raise ValueError(f"Unable to download file at {web_url}") + with qaihm_temp_dir() as tmp_dir: + tmp_filepath = os.path.join(tmp_dir, Path(dst_path).name) + with tqdm(total=total_size, unit="B", unit_scale=True) as progress_bar: + with open(tmp_filepath, "wb") as file: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + os.rename(tmp_filepath, dst_path) print("Done") return dst_path diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py index 377fc357..26bc9d26 100644 --- a/qai_hub_models/utils/base_model.py +++ b/qai_hub_models/utils/base_model.py @@ -4,6 +4,8 @@ # --------------------------------------------------------------------- from __future__ import annotations +from contextlib import nullcontext +from copy import deepcopy from pathlib import Path from typing import Any, List, Optional @@ -102,6 +104,26 @@ class BaseModel( def __init__(self): torch.nn.Module.__init__(self) # Initialize Torch Module HubModel.__init__(self) # Initialize Hub Model + self.eval() + + def __setattr__(self, name: str, value: Any) -> None: + """ + When a new torch.nn.Module attribute is added, we want to set it to eval mode. + If this model is being trained, calling `model.train()` + will reverse all of these. + """ + if isinstance(value, torch.nn.Module) and not self.training: + value.eval() + torch.nn.Module.__setattr__(self, name, value) + + def __call__(self, *args, **kwargs): + """ + If a model is in eval mode (which equates to self.training == False), + we don't want to compute gradients when doing the forward pass. + """ + context_fn = nullcontext if self.training else torch.no_grad + with context_fn(): + return torch.nn.Module.__call__(self, *args, **kwargs) def convert_to_torchscript( self, input_spec: InputSpec | None = None, check_trace: bool = True @@ -115,8 +137,14 @@ def convert_to_torchscript( if not input_spec: input_spec = self.get_input_spec() + # Torchscript should never be trained, so disable gradients for all parameters. + # Need to do this on a model copy, in case the original model is being trained. + model_copy = deepcopy(self) + for param in model_copy.parameters(): + param.requires_grad = False + return torch.jit.trace( - self, make_torch_inputs(input_spec), check_trace=check_trace + model_copy, make_torch_inputs(input_spec), check_trace=check_trace ) def convert_to_hub_source_model( @@ -174,12 +202,12 @@ def get_hub_compile_options( break target_runtime_flag = target_runtime_flag or "qnn_lib_aarch64_android" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime == TargetRuntime.ONNX: target_runtime_flag = "onnx" elif target_runtime == TargetRuntime.TFLITE: target_runtime_flag = "tflite" - elif target_runtime == TargetRuntime.PRECOMPILED_ORT: - target_runtime_flag = "compiled_qnn_onnx" + elif target_runtime == TargetRuntime.PRECOMPILED_QNN_ONNX: + target_runtime_flag = "precompiled_qnn_onnx" else: raise NotImplementedError() diff --git a/qai_hub_models/utils/compare.py b/qai_hub_models/utils/compare.py index 8b887ba9..f9ae5ac5 100644 --- a/qai_hub_models/utils/compare.py +++ b/qai_hub_models/utils/compare.py @@ -43,8 +43,7 @@ def torch_inference( inputs[input_name] = torch.from_numpy(sample_inputs[input_name][i]).to( "cpu" ) - with torch.no_grad(): - out = model(*inputs.values()) + out = model(*inputs.values()) out_tuple = (out,) if isinstance(out, torch.Tensor) else out out_tuple = _flatten_tuple(out_tuple) diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py index 0c36432f..c61fbda9 100644 --- a/qai_hub_models/utils/config_loaders.py +++ b/qai_hub_models/utils/config_loaders.py @@ -100,6 +100,7 @@ "lgpl-lr", "deepfloyd-if-license", "llama2", + "llama3", "unknown", "other", } @@ -484,7 +485,7 @@ def __init__( has_on_target_demo: bool, qnn_export_failure_reason: str, tflite_export_failure_reason: str, - ort_export_failure_reason: str, + onnx_export_failure_reason: str, check_trace: bool, channel_last_input: List[str], channel_last_output: List[str], @@ -507,7 +508,7 @@ def __init__( self.has_on_target_demo = has_on_target_demo self.qnn_export_failure_reason = qnn_export_failure_reason self.tflite_export_failure_reason = tflite_export_failure_reason - self.ort_export_failure_reason = ort_export_failure_reason + self.onnx_export_failure_reason = onnx_export_failure_reason self.check_trace = check_trace self.channel_last_input = channel_last_input self.channel_last_output = channel_last_output @@ -548,7 +549,7 @@ def from_yaml( code_gen_config["has_on_target_demo"], code_gen_config["qnn_export_failure_reason"], code_gen_config["tflite_export_failure_reason"], - code_gen_config["ort_export_failure_reason"], + code_gen_config["onnx_export_failure_reason"], code_gen_config["check_trace"], code_gen_config["channel_last_input"], code_gen_config["channel_last_output"], @@ -577,7 +578,7 @@ def from_yaml( OptionalSchema("has_on_target_demo", default=False): bool, OptionalSchema("qnn_export_failure_reason", default=""): str, OptionalSchema("tflite_export_failure_reason", default=""): str, - OptionalSchema("ort_export_failure_reason", default=""): str, + OptionalSchema("onnx_export_failure_reason", default=""): str, OptionalSchema("check_trace", default=True): bool, OptionalSchema("channel_last_input", default=[]): list, OptionalSchema("channel_last_output", default=[]): list, @@ -743,6 +744,7 @@ def validate(self) -> Tuple[bool, Optional[str]]: if ( self.code_gen_config.tflite_export_failure_reason and self.code_gen_config.qnn_export_failure_reason + and self.code_gen_config.onnx_export_failure_reason ): return False, "Public models must support at least one export path" diff --git a/qai_hub_models/utils/display.py b/qai_hub_models/utils/display.py index c628b0ff..5a48fc96 100644 --- a/qai_hub_models/utils/display.py +++ b/qai_hub_models/utils/display.py @@ -94,5 +94,5 @@ def display_or_save_image( if display_image(image, desc): return True - save_image(image, str(Path.cwd() / "build"), filename, desc) + save_image(image, os.path.join(Path.cwd(), "build"), filename, desc) return False diff --git a/qai_hub_models/utils/evaluate.py b/qai_hub_models/utils/evaluate.py index 33d8774b..314faad4 100644 --- a/qai_hub_models/utils/evaluate.py +++ b/qai_hub_models/utils/evaluate.py @@ -368,10 +368,9 @@ def evaluate_on_dataset( else: on_device_results.append(hub_model(model_inputs.split(1, dim=0))) - with torch.no_grad(): - for model_input, ground_truth in zip(model_inputs, ground_truth_values): - torch_output = torch_model(model_input.unsqueeze(0)) - torch_evaluator.add_batch(torch_output, ground_truth.unsqueeze(0)) + for model_input, ground_truth in zip(model_inputs, ground_truth_values): + torch_output = torch_model(model_input.unsqueeze(0)) + torch_evaluator.add_batch(torch_output, ground_truth.unsqueeze(0)) print( f"Cumulative torch accuracy on batch {i + 1}/{num_batches}: " f"{torch_evaluator.formatted_accuracy()}" diff --git a/qai_hub_models/utils/printing.py b/qai_hub_models/utils/printing.py index 1074a3d4..1adc37db 100644 --- a/qai_hub_models/utils/printing.py +++ b/qai_hub_models/utils/printing.py @@ -98,7 +98,7 @@ def print_profile_metrics_from_job( elif is_qnn_hub_model(profile_job.model): runtime = TargetRuntime.QNN elif profile_job.model.model_type in [SourceModelType.ORT, SourceModelType.ONNX]: - runtime = TargetRuntime.ORT + runtime = TargetRuntime.ONNX else: raise NotImplementedError() diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py index 22a6a29c..fbd845e6 100644 --- a/qai_hub_models/utils/quantization_aimet.py +++ b/qai_hub_models/utils/quantization_aimet.py @@ -444,7 +444,7 @@ def get_calibration_data( """ Calibration dataset for this model and input spec. """ - if target_runtime == TargetRuntime.ORT: + if target_runtime == TargetRuntime.ONNX: # TODO(#10896): Restore quantize_io flag when targeting ORT return None @@ -460,7 +460,10 @@ def get_hub_compile_options( device: Optional[Device] = None, ) -> str: quantization_flags = " --quantize_io" - if target_runtime not in [TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT]: + if target_runtime not in [ + TargetRuntime.ONNX, + TargetRuntime.PRECOMPILED_QNN_ONNX, + ]: quantization_flags += " --quantize_full_type int8" return ( super().get_hub_compile_options( # type: ignore @@ -473,3 +476,11 @@ def preferred_hub_source_model_format( self, target_runtime: TargetRuntime ) -> SourceModelFormat: return SourceModelFormat.ONNX + + def __call__(self, *args, **kwargs): + """ + Instance of AIMETQuantizableMixin should never be trained, + so should be safe to disable gradients during forward pass. + """ + with torch.no_grad(): + return super().__call__(*args, **kwargs) diff --git a/qai_hub_models/utils/scorecard/common.py b/qai_hub_models/utils/scorecard/common.py index a8395ec0..8077244a 100644 --- a/qai_hub_models/utils/scorecard/common.py +++ b/qai_hub_models/utils/scorecard/common.py @@ -32,6 +32,10 @@ class ScorecardDevice(Enum): cs_8250 = 4 cs_8550 = 5 cs_x_elite = 6 + cs_auto_lemans_8255 = 7 + cs_auto_lemans_8775 = 8 + cs_auto_lemans_8650 = 9 + # cs_auto_makena_8540 | Disabled until fp16 support is enabled for makena. def enabled(self) -> bool: valid_test_devices = os.environ.get("WHITELISTED_PROFILE_TEST_DEVICES", "ALL") @@ -41,6 +45,24 @@ def enabled(self) -> bool: or self.name in valid_test_devices.split(",") ) + def get_disabled_models(self) -> List[str]: + """ + Each chipset can have a list of 'disabled' models, for which the + chipset won't show up as a 'supported chipset' for that model. + """ + if self == ScorecardDevice.cs_6490: + return [ + "ConvNext-Tiny-w8a8-Quantized", + "ConvNext-Tiny-w8a16-Quantized", + "ResNet50Quantized", + "RegNetQuantized", + "HRNetPoseQuantized", + "SESR-M5-Quantized", + "Midas-V2-Quantized", + "Posenet-Mobilenet-Quantized", + ] + return [] + def all_enabled(self) -> List["ScorecardDevice"]: return [x for x in ScorecardDevice if x.enabled()] @@ -57,6 +79,14 @@ def get_reference_device(self) -> hub.Device: return _get_cached_device("QCS8550 (Proxy)") if self == ScorecardDevice.cs_x_elite: return _get_cached_device("Snapdragon X Elite CRD") + if self == ScorecardDevice.cs_auto_lemans_8255: + return _get_cached_device("SA8255 (Proxy)") + if self == ScorecardDevice.cs_auto_lemans_8775: + return _get_cached_device("SA8775 (Proxy)") + if self == ScorecardDevice.cs_auto_lemans_8650: + return _get_cached_device("SA8650 (Proxy)") + # if self == ScorecardDevice.cs_auto_makena_8540: + # return _get_cached_device("SA8540 (Proxy)") raise NotImplementedError(f"No reference device for {self.name}") def get_chipset(self) -> str: @@ -72,6 +102,14 @@ def get_chipset(self) -> str: return "qualcomm-qcs8550" if self == ScorecardDevice.cs_x_elite: return "qualcomm-snapdragon-x-elite" + if self == ScorecardDevice.cs_auto_lemans_8255: + return "qualcomm-sa8255p" + if self == ScorecardDevice.cs_auto_lemans_8775: + return "qualcomm-sa8775p" + if self == ScorecardDevice.cs_auto_lemans_8650: + return "qualcomm-sa8650p" + # if self == ScorecardDevice.cs_auto_makena_8540: + # return "qualcomm-sa8540p" raise NotImplementedError(f"No chipset for {self.name}") def get_os(self) -> str: @@ -84,7 +122,7 @@ def get_os(self) -> str: class ScorecardCompilePath(Enum): TFLITE = 0 QNN = 1 - ORT = 2 + ONNX = 2 def __str__(self): return self.name.lower() @@ -122,8 +160,8 @@ def get_parameterized_test_config( def get_runtime(self) -> TargetRuntime: if self == ScorecardCompilePath.TFLITE: return TargetRuntime.TFLITE - if self == ScorecardCompilePath.ORT: - return TargetRuntime.ORT + if self == ScorecardCompilePath.ONNX: + return TargetRuntime.ONNX if self == ScorecardCompilePath.QNN: return TargetRuntime.QNN raise NotImplementedError() @@ -155,14 +193,16 @@ def get_job_cache_name( class ScorecardProfilePath(Enum): TFLITE = 0 QNN = 1 - ORT = 2 - ORT_DML_GPU = 3 + ONNX = 2 + ONNX_DML_GPU = 3 def __str__(self): return self.name.lower() @property def long_name(self): + if self.name.lower() == "onnx": + return f"torchscript_{self.name.lower()}" return f"torchscript_onnx_{self.name.lower()}" def enabled(self) -> bool: @@ -179,7 +219,7 @@ def all_enabled() -> List["ScorecardProfilePath"]: def include_in_perf_yaml(self) -> bool: return self in [ ScorecardProfilePath.QNN, - ScorecardProfilePath.ORT, + ScorecardProfilePath.ONNX, ScorecardProfilePath.TFLITE, ] @@ -201,8 +241,8 @@ def get_parameterized_test_config( def get_runtime(self) -> TargetRuntime: if self == ScorecardProfilePath.TFLITE: return TargetRuntime.TFLITE - if self in [ScorecardProfilePath.ORT, ScorecardProfilePath.ORT_DML_GPU]: - return TargetRuntime.ORT + if self in [ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_GPU]: + return TargetRuntime.ONNX if self == ScorecardProfilePath.QNN: return TargetRuntime.QNN raise NotImplementedError() @@ -210,14 +250,14 @@ def get_runtime(self) -> TargetRuntime: def get_compile_path(self) -> ScorecardCompilePath: if self == ScorecardProfilePath.TFLITE: return ScorecardCompilePath.TFLITE - if self in [ScorecardProfilePath.ORT, ScorecardProfilePath.ORT_DML_GPU]: - return ScorecardCompilePath.ORT + if self in [ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_GPU]: + return ScorecardCompilePath.ONNX if self == ScorecardProfilePath.QNN: return ScorecardCompilePath.QNN raise NotImplementedError() def get_profile_options(self) -> str: - if self == ScorecardProfilePath.ORT_DML_GPU: + if self == ScorecardProfilePath.ONNX_DML_GPU: return "--compute_unit gpu" return "" @@ -234,7 +274,7 @@ def get_test_devices( if aimet_model else [] ) - elif self == ScorecardProfilePath.ORT: + elif self == ScorecardProfilePath.ONNX: devices = [ ScorecardDevice.cs_8_gen_2, ScorecardDevice.cs_8_gen_3, @@ -246,8 +286,11 @@ def get_test_devices( ScorecardDevice.cs_8_gen_3, ScorecardDevice.cs_x_elite, ScorecardDevice.cs_8550, + ScorecardDevice.cs_auto_lemans_8650, + ScorecardDevice.cs_auto_lemans_8775, + ScorecardDevice.cs_auto_lemans_8255, ] - elif self == ScorecardProfilePath.ORT_DML_GPU: + elif self == ScorecardProfilePath.ONNX_DML_GPU: devices = [ScorecardDevice.cs_x_elite] else: raise NotImplementedError() diff --git a/qai_hub_models/utils/scorecard/model_card.py b/qai_hub_models/utils/scorecard/model_card.py index 15849492..ea618fd2 100644 --- a/qai_hub_models/utils/scorecard/model_card.py +++ b/qai_hub_models/utils/scorecard/model_card.py @@ -316,10 +316,22 @@ def from_runs(model_runs: List[ProfileJobSummary]): def get_chipsets(self) -> Set[str]: chips: Set[str] = set() - for _, model_summary in self.runs_per_model.items(): - chips.update( - [x.get_chipset() for x in model_summary.runs_per_device.keys()] - ) + for model_id, model_summary in self.runs_per_model.items(): + for device, device_summary in model_summary.runs_per_device.items(): + # At least 1 successful run must exist for this chipset + success = False + for run in device_summary.run_per_path.values(): + if run.success: + success = True + break + if not success: + continue + + # Don't incude disabled models + if model_id in device.get_disabled_models(): + continue + + chips.add(device.get_chipset()) return chips def get_perf_card( diff --git a/scripts/util/env_create.sh b/scripts/util/env_create.sh index 71b85de8..3e7c066d 100755 --- a/scripts/util/env_create.sh +++ b/scripts/util/env_create.sh @@ -36,6 +36,7 @@ if [ ! -d "$ENV_PATH" ]; then echo "Activating virtual env." source "$ENV_PATH/bin/activate" + pip install pip==24.0 else source "$ENV_PATH/bin/activate" echo "Env created already. Skipping creation."