From 8d239fae3182097b9ddca7ac40ba13f9c02a7e06 Mon Sep 17 00:00:00 2001 From: Qualcomm AI Stack Models Bot Date: Tue, 2 Apr 2024 15:41:07 -0700 Subject: [PATCH] v0.4.1 See https://github.com/quic/ai-hub-models/releases/v0.4.1 for changelog. Signed-off-by: QAIHM Team --- README.md | 1 - qai_hub_models/_version.py | 2 +- qai_hub_models/models/_shared/whisper/app.py | 23 +- .../models/_shared/whisper/model.py | 88 ++++--- .../models/_shared/whisper/test_utils.py | 16 +- qai_hub_models/models/_shared/yolo/app.py | 33 ++- qai_hub_models/models/_shared/yolo/demo.py | 16 +- qai_hub_models/models/_shared/yolo/utils.py | 4 +- qai_hub_models/models/aotgan/perf.yaml | 45 ++-- qai_hub_models/models/convnext_tiny/perf.yaml | 25 +- qai_hub_models/models/ddrnet23_slim/perf.yaml | 25 +- .../models/deeplabv3_resnet50/perf.yaml | 45 ++-- qai_hub_models/models/densenet121/perf.yaml | 49 ++-- .../models/detr_resnet101/perf.yaml | 45 ++-- .../models/detr_resnet101_dc5/perf.yaml | 45 ++-- qai_hub_models/models/detr_resnet50/perf.yaml | 45 ++-- .../models/detr_resnet50_dc5/perf.yaml | 45 ++-- .../models/efficientnet_b0/perf.yaml | 51 +++-- qai_hub_models/models/esrgan/perf.yaml | 53 ++--- .../models/facebook_denoiser/perf.yaml | 25 +- qai_hub_models/models/fastsam_s/perf.yaml | 25 +- qai_hub_models/models/fastsam_x/perf.yaml | 25 +- qai_hub_models/models/fcn_resnet50/perf.yaml | 51 +++-- .../models/ffnet_122ns_lowres/perf.yaml | 53 ++--- qai_hub_models/models/ffnet_40s/perf.yaml | 53 ++--- .../models/ffnet_40s_quantized/README.md | 5 + .../models/ffnet_40s_quantized/perf.yaml | 25 +- .../ffnet_40s_quantized/requirements.txt | 1 + qai_hub_models/models/ffnet_54s/perf.yaml | 53 ++--- .../models/ffnet_54s_quantized/README.md | 5 + .../models/ffnet_54s_quantized/perf.yaml | 25 +- .../ffnet_54s_quantized/requirements.txt | 1 + qai_hub_models/models/ffnet_78s/perf.yaml | 53 ++--- .../models/ffnet_78s_lowres/perf.yaml | 53 ++--- .../models/ffnet_78s_quantized/README.md | 5 + .../models/ffnet_78s_quantized/perf.yaml | 25 +- .../ffnet_78s_quantized/requirements.txt | 1 + qai_hub_models/models/googlenet/perf.yaml | 51 +++-- .../models/googlenet_quantized/perf.yaml | 59 ++--- qai_hub_models/models/hrnet_pose/perf.yaml | 57 ++--- .../models/hrnet_pose_quantized/README.md | 59 ----- .../models/hrnet_pose_quantized/__init__.py | 8 - .../models/hrnet_pose_quantized/conftest.py | 26 --- .../models/hrnet_pose_quantized/demo.py | 57 ----- .../models/hrnet_pose_quantized/export.py | 215 ------------------ .../models/hrnet_pose_quantized/info.yaml | 36 --- .../models/hrnet_pose_quantized/model.py | 78 ------- .../models/hrnet_pose_quantized/perf.yaml | 108 --------- .../hrnet_pose_quantized/requirements.txt | 4 - .../models/hrnet_pose_quantized/test.py | 46 ---- .../huggingface_wavlm_base_plus/model.py | 24 +- qai_hub_models/models/inception_v3/perf.yaml | 49 ++-- .../models/inception_v3_quantized/perf.yaml | 25 +- qai_hub_models/models/lama_dilated/perf.yaml | 53 ++--- qai_hub_models/models/litehrnet/perf.yaml | 23 +- .../models/mediapipe_face/perf.yaml | 115 +++++----- .../models/mediapipe_hand/perf.yaml | 121 +++++----- .../models/mediapipe_pose/perf.yaml | 121 +++++----- .../models/mediapipe_selfie/perf.yaml | 47 ++-- qai_hub_models/models/mnasnet05/perf.yaml | 51 +++-- qai_hub_models/models/mobilenet_v2/perf.yaml | 47 ++-- .../models/mobilenet_v2_quantized/perf.yaml | 47 ++-- .../models/mobilenet_v3_large/perf.yaml | 23 +- .../mobilenet_v3_large_quantized/perf.yaml | 33 +-- .../models/mobilenet_v3_small/perf.yaml | 21 +- qai_hub_models/models/openai_clip/perf.yaml | 144 +++++++++--- qai_hub_models/models/openpose/perf.yaml | 51 +++-- qai_hub_models/models/protocols.py | 15 +- .../models/quicksrnetlarge/perf.yaml | 51 +++-- .../quicksrnetlarge_quantized/perf.yaml | 33 +-- .../models/quicksrnetmedium/perf.yaml | 47 ++-- .../quicksrnetmedium_quantized/perf.yaml | 33 +-- .../models/quicksrnetsmall/perf.yaml | 51 +++-- .../quicksrnetsmall_quantized/perf.yaml | 31 +-- .../models/real_esrgan_general_x4v3/perf.yaml | 53 ++--- .../models/real_esrgan_x4plus/perf.yaml | 25 +- qai_hub_models/models/regnet/perf.yaml | 51 +++-- qai_hub_models/models/resnet101/perf.yaml | 51 +++-- .../models/resnet101_quantized/perf.yaml | 51 +++-- qai_hub_models/models/resnet18/perf.yaml | 51 +++-- .../models/resnet18_quantized/perf.yaml | 47 ++-- qai_hub_models/models/resnet50/perf.yaml | 53 ++--- qai_hub_models/models/resnext101/perf.yaml | 47 ++-- .../models/resnext101_quantized/perf.yaml | 25 +- qai_hub_models/models/resnext50/perf.yaml | 47 ++-- .../models/resnext50_quantized/perf.yaml | 21 +- qai_hub_models/models/sam/perf.yaml | 41 ++-- qai_hub_models/models/sesr_m5/perf.yaml | 51 +++-- .../models/sesr_m5_quantized/perf.yaml | 33 +-- qai_hub_models/models/shufflenet_v2/perf.yaml | 49 ++-- .../models/shufflenet_v2_quantized/perf.yaml | 49 ++-- qai_hub_models/models/sinet/perf.yaml | 51 +++-- qai_hub_models/models/squeezenet1_1/perf.yaml | 43 ++-- .../models/squeezenet1_1_quantized/perf.yaml | 49 ++-- qai_hub_models/models/stylegan2/perf.yaml | 33 +-- qai_hub_models/models/swin_base/perf.yaml | 25 +- qai_hub_models/models/swin_small/perf.yaml | 23 +- qai_hub_models/models/swin_tiny/perf.yaml | 25 +- qai_hub_models/models/trocr/perf.yaml | 55 ++--- .../models/unet_segmentation/perf.yaml | 51 +++-- qai_hub_models/models/vit/perf.yaml | 25 +- .../models/whisper_base_en/perf.yaml | 61 ++--- .../models/whisper_small_en/perf.yaml | 67 +++--- .../models/whisper_tiny_en/perf.yaml | 59 ++--- qai_hub_models/models/wideresnet50/perf.yaml | 51 +++-- .../models/wideresnet50_quantized/perf.yaml | 51 +++-- qai_hub_models/models/xlsr/perf.yaml | 51 +++-- .../models/xlsr_quantized/perf.yaml | 33 +-- qai_hub_models/models/yolov6/model.py | 27 ++- qai_hub_models/models/yolov6/perf.yaml | 53 ++--- qai_hub_models/models/yolov7/model.py | 26 ++- qai_hub_models/models/yolov7/perf.yaml | 25 +- qai_hub_models/models/yolov8_det/app.py | 8 + qai_hub_models/models/yolov8_det/model.py | 36 ++- qai_hub_models/models/yolov8_det/perf.yaml | 53 ++--- qai_hub_models/models/yolov8_seg/perf.yaml | 25 +- qai_hub_models/utils/args.py | 46 +++- qai_hub_models/utils/model_adapters.py | 8 +- qai_hub_models/utils/quantization_aimet.py | 42 +--- scripts/examples/quantize_deeplabv3.py | 2 +- scripts/examples/quantize_ffnet.py | 7 +- .../examples/quantize_imagenet_classifier.py | 16 +- scripts/examples/quantize_superresolution.py | 18 +- ..._numerics_imagenet_classifier_quantized.py | 36 ++- 124 files changed, 2425 insertions(+), 2701 deletions(-) create mode 100644 qai_hub_models/models/ffnet_40s_quantized/requirements.txt create mode 100644 qai_hub_models/models/ffnet_54s_quantized/requirements.txt create mode 100644 qai_hub_models/models/ffnet_78s_quantized/requirements.txt delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/README.md delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/__init__.py delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/conftest.py delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/demo.py delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/export.py delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/info.yaml delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/model.py delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/perf.yaml delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/requirements.txt delete mode 100644 qai_hub_models/models/hrnet_pose_quantized/test.py diff --git a/README.md b/README.md index d9443541..8e1b9e22 100644 --- a/README.md +++ b/README.md @@ -349,7 +349,6 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE | | | | | | **Pose Estimation** | [HRNetPose](https://aihub.qualcomm.com/models/hrnet_pose) | [qai_hub_models.models.hrnet_pose](qai_hub_models/models/hrnet_pose/README.md) | ✔️ | ✔️ | ✔️ -| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | ✔️ | ✔️ | ✔️ | [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | ✔️ | ✔️ | ✔️ | [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | ✔️ | ✔️ | ✔️ | [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️ diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py index 1fab5070..78664c69 100644 --- a/qai_hub_models/_version.py +++ b/qai_hub_models/_version.py @@ -2,4 +2,4 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -__version__ = "0.4.0" +__version__ = "0.4.1" diff --git a/qai_hub_models/models/_shared/whisper/app.py b/qai_hub_models/models/_shared/whisper/app.py index 4a548b94..b99f54ba 100644 --- a/qai_hub_models/models/_shared/whisper/app.py +++ b/qai_hub_models/models/_shared/whisper/app.py @@ -34,7 +34,9 @@ def __init__(self, whisper: Whisper): decoder = whisper.decoder.to("cpu") encoder = whisper.encoder.to("cpu") self.num_decoder_blocks = whisper.num_decoder_blocks + self.num_decoder_heads = whisper.num_decoder_heads self.attention_dim = whisper.attention_dim + self.max_decode_len = whisper.max_decode_len # Wraps torch Module so it takes np ndarray as input and outputs if isinstance(encoder, torch.nn.Module): @@ -42,7 +44,7 @@ def __init__(self, whisper: Whisper): else: self.encoder = encoder if isinstance(decoder, torch.nn.Module): - self.decoder = TorchNumpyAdapter(decoder) + self.decoder = TorchNumpyAdapter(decoder.eval()) else: self.decoder = decoder @@ -67,18 +69,27 @@ def transcribe(self, mel_input: np.ndarray) -> str: # coreml only takes float tensors x = np.array([[TOKEN_SOT]]) decoded_tokens = [TOKEN_SOT] - cache_tensor = np.array([], dtype=np.float32).reshape( - (1, 0, self.attention_dim) - ) + sample_len = self.max_decode_len # max # of tokens to sample + cache_tensor = np.zeros((1, sample_len, self.attention_dim)).astype(np.float32) self_attn_cache = [cache_tensor] * 2 * self.num_decoder_blocks - sample_len = 224 # max # of tokens to sample sum_logprobs = 0 for i in range(sample_len): - decoder_out = self.decoder(x, *cross_attn_cache, *self_attn_cache) + # Using i to index inside the decoder model hurts the + # the model performance. + # index - used to get positional embedding correctly. + index = torch.zeros([1, 1], dtype=torch.int32) + index[0, 0] = i + # Use mask to get the k_cache updated with new key + mask = torch.zeros(1, sample_len, self.attention_dim, dtype=torch.bool) + mask[:, i, :] = 1 + decoder_out = self.decoder( + x, index, mask, *cross_attn_cache, *self_attn_cache + ) # logit has shape (1, decoded_len, 51864) logits = decoder_out[0] self_attn_cache = decoder_out[1:] # type: ignore + # logit has shape (51864,) logits = logits[0, -1] # consider only the last token diff --git a/qai_hub_models/models/_shared/whisper/model.py b/qai_hub_models/models/_shared/whisper/model.py index dffcbb07..06e2a0d3 100644 --- a/qai_hub_models/models/_shared/whisper/model.py +++ b/qai_hub_models/models/_shared/whisper/model.py @@ -29,11 +29,14 @@ def __init__( decoder: Callable[..., Tuple[torch.Tensor, Tuple[torch.Tensor, ...]]], num_decoder_blocks: int, attention_dim: int, + num_heads: int, ): self.encoder = encoder self.decoder = decoder self.num_decoder_blocks = num_decoder_blocks self.attention_dim = attention_dim + self.num_decoder_heads = num_heads + self.max_decode_len = MAX_DECODE_LEN @classmethod def from_pretrained(cls, model: str = "tiny.en"): @@ -46,7 +49,8 @@ def from_source_model(cls, whisper_model: Any): decoder = WhisperDecoderInf(whisper_model.decoder) num_decoder_blocks = len(decoder.blocks) attention_dim = decoder.attention_dim - return cls(encoder, decoder, num_decoder_blocks, attention_dim) # type: ignore + num_heads = decoder.num_heads + return cls(encoder, decoder, num_decoder_blocks, attention_dim, num_heads) # type: ignore class WhisperEncoderInf(BaseModel): @@ -120,13 +124,30 @@ def __init__(self, model: whisper.model.TextDecoder): def attention_dim(self): return self.blocks[0].attn_ln.weight.shape[0] - def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs): + @property + def num_heads(self): + return self.blocks[0].attn.n_head + + def forward( + self, + x: torch.Tensor, + index: torch.Tensor, + mask: torch.Tensor, + *kv_cache_args, + **kv_cache_kwargs, + ): """ Args: - x: torch.LongTensor, shape = (batch_size, <= n_ctx) the text tokens + - index: torch.tensor, shape = (1, 1) + index to get the positional encoding for x. + + - mask: torch.tensor, shape = (1, max_sample_length, attn_dim) + Mask helps create kv_cache while keeping the size consistent. + - kv_cache_args: Tuple of length 4 * num_decoder_blocks. Elements are: b{i}_cross_attn_k: [1, 1500, attn_dim] @@ -136,8 +157,8 @@ def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs): followed by - b{i}_self_attn_k: [1, decoded_len, attn_dim] - b{i}_self_attn_v: [1, decoded_len, attn_dim] + b{i}_self_attn_k: [1, max_sample_length, attn_dim] + b{i}_self_attn_v: [1, max_sample_length, attn_dim] for i = 0, ..., num_blocks @@ -147,8 +168,10 @@ def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs): - b0_self_attn_k, b0_self_attn_v, b1_self_attn_k, ...: Updated self attn cache. 2*num_decoder_blocks """ + if not kv_cache_args: kv_cache_args = list(kv_cache_kwargs.values()) + assert isinstance(self.token_embedding, torch.nn.Module) # for mypy assert isinstance(self.ln, torch.nn.Module) # for mypy assert isinstance(self.positional_embedding, torch.nn.Parameter) # for mypy @@ -163,16 +186,13 @@ def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs): block.cross_attn.value: kv_cache_args[i * 2 + 1], } ) - offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0 - x = ( - self.token_embedding(x) - + self.positional_embedding[offset : offset + x.shape[-1]] - ) + + x = self.token_embedding(x) + self.positional_embedding[index.long()] # x shape: (1, 1, 384) kv_cache_new = [] for block in self.blocks: - x, k_cache, v_cache = block(x, kv_cache=kv_cache) + x, k_cache, v_cache = block(x, index, mask, kv_cache=kv_cache) kv_cache_new.append(k_cache.float()) kv_cache_new.append(v_cache.float()) @@ -188,33 +208,38 @@ def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs): return (logits,) + tuple(kv_cache_new) @staticmethod - def get_input_spec(num_blocks: int, attention_dim: int) -> InputSpec: + def get_input_spec( + num_blocks: int, attention_dim: int, num_heads: int + ) -> InputSpec: """ Returns the input specification (name -> (shape, type). This can be used to submit profiling job on Qualcomm AI Hub. """ - specs = dict(x=((1, 1), "int32")) + specs = dict( + x=((1, 1), "int32"), + index=((1, 1), "int32"), + mask=((1, MAX_DECODE_LEN, attention_dim), "int32"), + ) for i in range(num_blocks): specs[f"b{i}_cross_attn_k"] = ((1, 1500, attention_dim), "float32") specs[f"b{i}_cross_attn_v"] = ((1, 1500, attention_dim), "float32") - # Use mean length for profiling - mean_decode_len = MAX_DECODE_LEN // 2 - for i in range(num_blocks): specs[f"b{i}_self_attn_k"] = ( - (1, mean_decode_len, attention_dim), + (1, MAX_DECODE_LEN, attention_dim), "float32", ) specs[f"b{i}_self_attn_v"] = ( - (1, mean_decode_len, attention_dim), + (1, MAX_DECODE_LEN, attention_dim), "float32", ) return specs def _get_input_spec_for_instance(self) -> InputSpec: - return self.__class__.get_input_spec(len(self.blocks), self.attention_dim) + return self.__class__.get_input_spec( + len(self.blocks), self.attention_dim, self.num_heads + ) @classmethod def from_pretrained(cls): @@ -250,6 +275,8 @@ def __init__(self, model: whisper.model.MultiHeadAttention, attn_type: str): def forward( self, x: torch.Tensor, + index: torch.Tensor, + mask: torch.Tensor, kv_cache: Dict[torch.nn.Module, torch.Tensor], ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ @@ -273,18 +300,19 @@ def forward( assert isinstance(self.value, torch.nn.Module) # for mypy assert isinstance(self.out, torch.nn.Module) # for mypy q = self.query(x) - if self.attn_type == "self_attention": k_cache = kv_cache[self.key] v_cache = kv_cache[self.value] - k = self.key(x) - v = self.value(x) - k = torch.cat([k_cache, k], dim=1) - v = torch.cat([v_cache, v], dim=1) + k = torch.zeros(k_cache.shape) + v = torch.zeros(v_cache.shape) + k = mask * self.key(x) + k_cache + v = mask * self.value(x) + v_cache + new_index = torch.tensor([index[0, 0] + 1]).long() + wv = qkv_attention(q, k[:, :new_index], v[:, :new_index], self.n_head) else: # cross_attention k, v = kv_cache[self.key], kv_cache[self.value] + wv = qkv_attention(q, k, v, self.n_head) - wv = qkv_attention(q, k, v, self.n_head) # Return updated kv cache return self.out(wv), k.detach(), v.detach() @@ -300,6 +328,7 @@ def qkv_attention( Adapted from whisper.model.MultiHeadAttention.qkv_attention """ n_batch, n_ctx, n_state = q.shape + scale = (n_state // n_head) ** -0.25 q = q.view(*q.shape[:2], n_head, -1).permute(0, 2, 1, 3) * scale k = k.view(*k.shape[:2], n_head, -1).permute(0, 2, 3, 1) * scale @@ -307,7 +336,8 @@ def qkv_attention( qk = q @ k if mask is not None: - qk = qk + mask[:n_ctx, :n_ctx] + qk = qk + mask + # Use negative infinity to mask the zeros when doing the softmax. qk = qk.float() w = torch.nn.functional.softmax(qk, dim=-1).to(q.dtype) @@ -334,6 +364,8 @@ def __init__(self, model: whisper.model.ResidualAttentionBlock): def forward( self, x: torch.Tensor, + index: torch.Tensor, + mask: torch.Tensor, kv_cache: Dict[torch.nn.Module, torch.Tensor], ): """ @@ -347,13 +379,15 @@ def forward( assert isinstance(self.cross_attn, torch.nn.Module) # for mypy assert isinstance(self.mlp, torch.nn.Module) # for mypy assert isinstance(self.mlp_ln, torch.nn.Module) # for mypy - x_attn, k_cache, v_cache = self.attn(self.attn_ln(x), kv_cache=kv_cache) + x_attn, k_cache, v_cache = self.attn( + self.attn_ln(x), index=index, mask=mask, kv_cache=kv_cache + ) x = x + x_attn if self.cross_attn: # Ignore cross attn kv cache which is constant (pre-computed in # `WhisperCrossAttnKVCacheTorch`) x_cross_attn, _, _ = self.cross_attn( - self.cross_attn_ln(x), kv_cache=kv_cache + self.cross_attn_ln(x), index=index, mask=mask, kv_cache=kv_cache ) x = x + x_cross_attn x = x + self.mlp(self.mlp_ln(x)) diff --git a/qai_hub_models/models/_shared/whisper/test_utils.py b/qai_hub_models/models/_shared/whisper/test_utils.py index a75dd327..b3657a76 100644 --- a/qai_hub_models/models/_shared/whisper/test_utils.py +++ b/qai_hub_models/models/_shared/whisper/test_utils.py @@ -13,6 +13,7 @@ ) from qai_hub_models.models._shared.whisper.demo import TEST_AUDIO_PATH from qai_hub_models.models._shared.whisper.model import ( + MAX_DECODE_LEN, MEL_FILTER_PATH, Whisper, WhisperDecoderInf, @@ -49,11 +50,16 @@ def run_test_wrapper_numerics(whisper_version): decoder = WhisperDecoderInf(model.decoder) cross_attn_cache = encoder(mel_input) - cache_tensor = np.array([], dtype=np.float32).reshape((1, 0, decoder.attention_dim)) - self_attn_cache = [torch.from_numpy(cache_tensor)] * 2 * decoder.num_blocks - - decoder_out = decoder(tokens, *cross_attn_cache, *self_attn_cache) - logits = decoder_out[0].detach().numpy() + sample_len = MAX_DECODE_LEN + cache_tensor = np.zeros([1, sample_len, decoder.attention_dim]).astype(np.float32) + index = torch.zeros([1, 1], dtype=torch.int32) + index[0, 0] = 0 + mask = torch.zeros(1, sample_len, decoder.attention_dim, dtype=torch.bool) + mask[:, 0, :] = 1 + self_attn_cache = [cache_tensor] * 2 * decoder.num_blocks + with torch.no_grad(): + decoder_out = decoder(tokens, index, mask, *cross_attn_cache, *self_attn_cache) + logits = decoder_out[0].detach().numpy() np.testing.assert_allclose(logits_orig, logits) diff --git a/qai_hub_models/models/_shared/yolo/app.py b/qai_hub_models/models/_shared/yolo/app.py index 668ebd0d..5fa6d0e3 100644 --- a/qai_hub_models/models/_shared/yolo/app.py +++ b/qai_hub_models/models/_shared/yolo/app.py @@ -10,6 +10,7 @@ import torch from PIL.Image import Image +from qai_hub_models.models._shared.yolo.utils import detect_postprocess from qai_hub_models.utils.bounding_box_processing import batched_nms from qai_hub_models.utils.draw import draw_box_from_xyxy from qai_hub_models.utils.image_processing import app_to_net_image_inputs @@ -38,6 +39,7 @@ def __init__( ], nms_score_threshold: float = 0.45, nms_iou_threshold: float = 0.7, + model_includes_postprocessing: bool = True, ): """ Initialize a YoloObjectDetectionApp application. @@ -63,10 +65,14 @@ def __init__( nms_iou_threshold Intersection over Union threshold for non maximum suppression. + + model_includes_postprocessing + Whether the model includes postprocessing steps beyond the detector. """ self.model = model self.nms_score_threshold = nms_score_threshold self.nms_iou_threshold = nms_iou_threshold + self.model_includes_postprocessing = model_includes_postprocessing def check_image_size(self, pixel_values: torch.Tensor) -> None: """ @@ -120,7 +126,12 @@ class scores per batch multiplied by confidence: List element shape is [num_pred self.check_image_size(NCHW_fp32_torch_frames) # Run prediction - pred_boxes, pred_scores, pred_class_idx = self.model(NCHW_fp32_torch_frames) + if self.model_includes_postprocessing: + pred_boxes, pred_scores, pred_class_idx = self.model(NCHW_fp32_torch_frames) + else: + pred_boxes, pred_scores, pred_class_idx = self.pre_nms_postprocess( + self.model(NCHW_fp32_torch_frames) + ) # Non Maximum Suppression on each batch pred_boxes, pred_scores, pred_class_idx = batched_nms( @@ -148,3 +159,23 @@ class scores per batch multiplied by confidence: List element shape is [num_pred ) return NHWC_int_numpy_frames + + def pre_nms_postprocess( + self, prediction: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Process the output of the YOLO detector for input to NMS. + + Parameters: + detector_output: torch.Tensor + The output of Yolo detection model. Tensor shape varies by model implementation. + + Returns: + boxes: torch.Tensor + Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) + scores: torch.Tensor + class scores multiplied by confidence: Shape is [batch, num_preds] + class_idx: torch.tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + """ + return detect_postprocess(prediction) diff --git a/qai_hub_models/models/_shared/yolo/demo.py b/qai_hub_models/models/_shared/yolo/demo.py index 0f6d4a1d..2ea27c44 100644 --- a/qai_hub_models/models/_shared/yolo/demo.py +++ b/qai_hub_models/models/_shared/yolo/demo.py @@ -16,14 +16,14 @@ validate_on_device_demo_args, ) from qai_hub_models.utils.asset_loaders import CachedWebAsset, load_image -from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.base_model import HubModel from qai_hub_models.utils.display import display_or_save_image # Run Yolo end-to-end on a sample image. # The demo will display a image with the predicted bounding boxes. def yolo_detection_demo( - model_type: Type[BaseModel], + model_type: Type[HubModel], model_id: str, app_type: Callable[..., YoloObjectDetectionApp], default_image: str | CachedWebAsset, @@ -49,12 +49,20 @@ def yolo_detection_demo( default=0.7, help="Intersection over Union (IoU) threshold for NonMaximumSuppression", ) - args = parser.parse_args([] if is_test else None) + pargs = parser.parse_args([] if is_test else None) + args = pargs + validate_on_device_demo_args(args, model_id) model = demo_model_from_cli_args(model_type, model_id, args) - app = app_type(model, args.score_threshold, args.iou_threshold) + app = app_type( + model, + args.score_threshold, + args.iou_threshold, + args.include_postprocessing if not is_test else True, + ) + print("Model Loaded") image = load_image(args.image) pred_images = app.predict_boxes_from_image(image) diff --git a/qai_hub_models/models/_shared/yolo/utils.py b/qai_hub_models/models/_shared/yolo/utils.py index 5911106f..87579452 100644 --- a/qai_hub_models/models/_shared/yolo/utils.py +++ b/qai_hub_models/models/_shared/yolo/utils.py @@ -49,8 +49,8 @@ def detect_postprocess(detector_output: torch.Tensor): The output of Yolo Detection model Shape is [batch, num_preds, k] where, k = # of classes + 5 - k is structured as follows [boxes (4) : conf (1) : # of classes] - and boxes are co-ordinates [x_center, y_center, w, h] + k is structured as follows [box_coordinates (4) , conf (1) , # of classes] + and box_coordinates are [x_center, y_center, w, h] Returns: boxes: torch.Tensor diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml index 895bc321..126d72d4 100644 --- a/qai_hub_models/models/aotgan/perf.yaml +++ b/qai_hub_models/models/aotgan/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: AOT-GAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 172836.0 - throughput: 5.785831655442153 + inference_time: 172572.0 + throughput: 5.79468279906358 estimated_peak_memory_range: - min: 3305472 - max: 6628872 + min: 2220032 + max: 5310760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jqpyel4gy + job_id: jw562w2vg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,13 +53,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:15:11.791489Z' + timestamp: '2024-04-02T16:05:49.549048Z' torchscript_onnx_qnn: - inference_time: 162909.0 - throughput: 6.138396282587212 + inference_time: 162522.0 + throughput: 6.15301313053002 estimated_peak_memory_range: - min: 4268032 - max: 33754568 + min: 3313664 + max: 38238512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -66,14 +67,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: j1p8on8g9 + job_id: j1pvq7q7g job_status: Passed - torchscript_onnx_tflite: - inference_time: 127366.0 - throughput: 7.851388910698303 + inference_time: 126409.0 + throughput: 7.910829134001535 estimated_peak_memory_range: - min: 2334720 - max: 227053936 + min: 2404352 + max: 254900160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j2p0ywegw + job_id: jwgoz8z4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,13 +91,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:17:12.829523Z' + timestamp: '2024-04-02T16:07:27.833498Z' torchscript_onnx_qnn: - inference_time: 120027.0 - throughput: 8.331458755113433 + inference_time: 119294.0 + throughput: 8.382651264942076 estimated_peak_memory_range: - min: 0 - max: 140852624 + min: 3862528 + max: 165145744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -104,5 +105,5 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: jogkz1ogd + job_id: j7gjdqd7g job_status: Passed diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml index d850c5bf..f8254acd 100644 --- a/qai_hub_models/models/convnext_tiny/perf.yaml +++ b/qai_hub_models/models/convnext_tiny/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ConvNext-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 11538.0 - throughput: 86.67013347200555 + inference_time: 11504.0 + throughput: 86.92628650904034 estimated_peak_memory_range: - min: 53248 - max: 2750320 + min: 32768 + max: 2493040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jnp10l25q + job_id: jlpeoyo7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:26:23.235644Z' + timestamp: '2024-04-02T15:30:19.195043Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 8123.0 - throughput: 123.10722639418934 + inference_time: 8139.0 + throughput: 122.86521685710775 estimated_peak_memory_range: - min: 40960 - max: 205818960 + min: 20480 + max: 209217264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jvgdw9e5j + job_id: jygz2n2zg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:26:23.235670Z' + timestamp: '2024-04-02T15:30:19.195057Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml index 277e6c15..d60ad31b 100644 --- a/qai_hub_models/models/ddrnet23_slim/perf.yaml +++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: DDRNet23-Slim performance_metrics: - torchscript_onnx_tflite: - inference_time: 6741.0 - throughput: 148.3459427384661 + inference_time: 6702.0 + throughput: 149.20919128618323 estimated_peak_memory_range: - min: 1024000 - max: 28696320 + min: 1003520 + max: 2797288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jz5wo7zp1 + job_id: jz5ww4wz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:17:36.932886Z' + timestamp: '2024-04-02T15:51:44.265591Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 4644.0 - throughput: 215.33161068044788 + inference_time: 4785.0 + throughput: 208.9864158829676 estimated_peak_memory_range: - min: 45056 - max: 68954288 + min: 36864 + max: 71748864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jmg9vmq57 + job_id: jmg90d0qg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:17:36.932896Z' + timestamp: '2024-04-02T15:51:44.265604Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml index 1d0853f7..9e41614c 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml +++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: DeepLabV3-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 57559.0 - throughput: 17.373477649021005 + inference_time: 58066.0 + throughput: 17.221782110012743 estimated_peak_memory_range: - min: 106496 - max: 3561872 + min: 12288 + max: 171781856 primary_compute_unit: GPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 96 layers_on_cpu: 0 total_layers: 96 - job_id: jw5663y5o + job_id: jqpyzmrrg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,13 +53,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:49:36.627925Z' + timestamp: '2024-04-02T15:16:34.908612Z' torchscript_onnx_qnn: - inference_time: 145372.0 - throughput: 6.878903777893955 + inference_time: 145873.0 + throughput: 6.855278221466619 estimated_peak_memory_range: - min: 724992 - max: 17276040 + min: 811008 + max: 9257648 primary_compute_unit: GPU precision: fp16 layer_info: @@ -66,14 +67,14 @@ models: layers_on_gpu: 82 layers_on_cpu: 0 total_layers: 82 - job_id: jwgoy1k58 + job_id: j1p8210zp job_status: Passed - torchscript_onnx_tflite: - inference_time: 40153.0 - throughput: 24.904739371902473 + inference_time: 40355.0 + throughput: 24.780076818238136 estimated_peak_memory_range: - min: 4358144 - max: 29236608 + min: 0 + max: 28183312 primary_compute_unit: GPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 96 layers_on_cpu: 0 total_layers: 96 - job_id: j1p3k4n52 + job_id: j2p04632g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,13 +91,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:51:31.429028Z' + timestamp: '2024-04-02T15:19:14.249826Z' torchscript_onnx_qnn: - inference_time: 104457.0 - throughput: 9.573317250160352 + inference_time: 104946.0 + throughput: 9.52871000323976 estimated_peak_memory_range: - min: 675840 - max: 24520160 + min: 700416 + max: 26619552 primary_compute_unit: GPU precision: fp16 layer_info: @@ -104,5 +105,5 @@ models: layers_on_gpu: 82 layers_on_cpu: 0 total_layers: 82 - job_id: j1pv31r5x + job_id: jogkv87yp job_status: Passed diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml index 42d62485..48ff8f44 100644 --- a/qai_hub_models/models/densenet121/perf.yaml +++ b/qai_hub_models/models/densenet121/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: DenseNet-121 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1603.0 - throughput: 623.8303181534623 + inference_time: 1615.0 + throughput: 619.1950464396285 estimated_peak_memory_range: - min: 16384 - max: 20547528 + min: 20480 + max: 2339568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 310 - job_id: jqpyen0gy + job_id: jn5q0ve7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:35:22.500705Z' + timestamp: '2024-04-02T15:16:44.466434Z' torchscript_onnx_qnn: - inference_time: 1436.0 - throughput: 696.3788300835655 + inference_time: 1442.0 + throughput: 693.4812760055479 estimated_peak_memory_range: - min: 618496 - max: 5887960 + min: 20480 + max: 9456304 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 371 + layers_on_npu: 370 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 371 - job_id: j1p8o6qg9 + total_layers: 370 + job_id: jw562wevg job_status: Passed - torchscript_onnx_tflite: - inference_time: 1114.0 - throughput: 897.6660682226212 + inference_time: 1112.0 + throughput: 899.2805755395683 estimated_peak_memory_range: min: 12288 - max: 93424064 + max: 95054176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 310 - job_id: j2p0yd0gw + job_id: j1gl4l6e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:39:50.803809Z' + timestamp: '2024-04-02T15:19:29.601699Z' torchscript_onnx_qnn: - inference_time: 985.0 - throughput: 1015.2284263959391 + inference_time: 977.0 + throughput: 1023.5414534288639 estimated_peak_memory_range: min: 618496 - max: 142978448 + max: 148303712 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 371 + layers_on_npu: 370 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 371 - job_id: jn5q8ze57 + total_layers: 370 + job_id: j1p3n6vx5 job_status: Passed diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml index ab365fa3..737401f0 100644 --- a/qai_hub_models/models/detr_resnet101/perf.yaml +++ b/qai_hub_models/models/detr_resnet101/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: DETR-ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 563957.0 - throughput: 1.7731848350140171 + inference_time: 53317.0 + throughput: 18.755743946583642 estimated_peak_memory_range: - min: 102526976 - max: 112477944 - primary_compute_unit: CPU - precision: fp32 + min: 77824 + max: 8355272 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 957 + layers_on_npu: 954 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 957 - job_id: jmg9v8m57 + job_id: j1pvq707g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:10:49.800332Z' + timestamp: '2024-04-02T15:52:17.373948Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 489867.0 - throughput: 2.0413704127855112 + inference_time: 39536.0 + throughput: 25.29340348037232 estimated_peak_memory_range: - min: 109977600 - max: 266823568 - primary_compute_unit: CPU - precision: fp32 + min: 1413120 + max: 263608576 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 957 + layers_on_npu: 954 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 957 - job_id: jnp103n5q + job_id: j7gjdqz7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:10:49.800340Z' + timestamp: '2024-04-02T15:52:17.373962Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml index 07e3fdc8..643ad6c8 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: DETR-ResNet101-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 976351.0 - throughput: 1.0242218218652923 + inference_time: 439506.0 + throughput: 2.2752817936501435 estimated_peak_memory_range: - min: 168345600 - max: 171158408 - primary_compute_unit: CPU - precision: fp32 + min: 8531968 + max: 17800792 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 958 + layers_on_npu: 955 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 958 - job_id: jep28v6p6 + job_id: jlpeoye7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:03:43.829001Z' + timestamp: '2024-04-02T15:53:12.170672Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 777938.0 - throughput: 1.2854494831207628 + inference_time: 331401.0 + throughput: 3.017492403462874 estimated_peak_memory_range: - min: 175112192 - max: 339555616 - primary_compute_unit: CPU - precision: fp32 + min: 106496 + max: 457171760 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 958 + layers_on_npu: 955 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 958 - job_id: jqpye70gy + job_id: jygz2nozg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:03:43.829010Z' + timestamp: '2024-04-02T15:53:12.170686Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml index d07318ba..9cafd95b 100644 --- a/qai_hub_models/models/detr_resnet50/perf.yaml +++ b/qai_hub_models/models/detr_resnet50/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: DETR-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 365312.0 - throughput: 2.737386124737211 + inference_time: 49534.0 + throughput: 20.188153591472524 estimated_peak_memory_range: - min: 109416448 - max: 444976064 - primary_compute_unit: CPU - precision: fp32 + min: 1585152 + max: 11362840 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 889 + layers_on_npu: 886 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 889 - job_id: j1p3k7x52 + job_id: jz5ww42z5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:08:31.933833Z' + timestamp: '2024-04-02T15:30:45.384076Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 287302.0 - throughput: 3.480657983585217 + inference_time: 36491.0 + throughput: 27.404017428955086 estimated_peak_memory_range: - min: 108204032 - max: 196940032 - primary_compute_unit: CPU - precision: fp32 + min: 135168 + max: 216736992 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 889 + layers_on_npu: 886 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 889 - job_id: jwgoyw458 + job_id: jmg90djqg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:08:31.933846Z' + timestamp: '2024-04-02T15:30:45.384090Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml index 4d135c6c..e8af80a3 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: DETR-ResNet50-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 692168.0 - throughput: 1.4447359600559402 + inference_time: 428409.0 + throughput: 2.3342180019560748 estimated_peak_memory_range: - min: 117583872 - max: 529905552 - primary_compute_unit: CPU - precision: fp32 + min: 6443008 + max: 14635248 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 890 + layers_on_npu: 887 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 890 - job_id: jqp4q2lgo + job_id: jnp126ykg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:27:50.803823Z' + timestamp: '2024-04-02T15:36:20.165124Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 521991.0 - throughput: 1.9157418422923 + inference_time: 326693.0 + throughput: 3.060977737508915 estimated_peak_memory_range: - min: 178831360 - max: 279734112 - primary_compute_unit: CPU - precision: fp32 + min: 147456 + max: 420422096 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 890 + layers_on_npu: 887 + layers_on_gpu: 2 + layers_on_cpu: 1 total_layers: 890 - job_id: j0pxvz9g7 + job_id: jvgdn2ek5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:27:50.803834Z' + timestamp: '2024-04-02T15:36:20.165138Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml index bce0b849..81b9663d 100644 --- a/qai_hub_models/models/efficientnet_b0/perf.yaml +++ b/qai_hub_models/models/efficientnet_b0/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: EfficientNet-B0 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2174.0 - throughput: 459.9816007359706 + inference_time: 1218.0 + throughput: 821.0180623973728 estimated_peak_memory_range: - min: 24576 - max: 2273464 + min: 16384 + max: 2283088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jlpe9l8gr + job_id: jz57290qp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:37:36.573638Z' + timestamp: '2024-04-02T15:50:13.918223Z' torchscript_onnx_qnn: - inference_time: 2173.0 - throughput: 460.1932811780948 + inference_time: 1223.0 + throughput: 817.6614881439084 estimated_peak_memory_range: - min: 16384 - max: 87349280 + min: 622592 + max: 7343432 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 242 + layers_on_npu: 241 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 242 - job_id: jz5wo14p1 + total_layers: 241 + job_id: j0px9xnjp job_status: Passed - torchscript_onnx_tflite: - inference_time: 1524.0 - throughput: 656.1679790026246 + inference_time: 907.0 + throughput: 1102.5358324145534 estimated_peak_memory_range: min: 12288 - max: 70874656 + max: 70459040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jygze44g8 + job_id: jqp4n3kqg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:42:10.776325Z' + timestamp: '2024-04-02T15:52:57.259137Z' torchscript_onnx_qnn: - inference_time: 1508.0 - throughput: 663.1299734748011 + inference_time: 886.0 + throughput: 1128.6681715575621 estimated_peak_memory_range: - min: 618496 - max: 79231776 + min: 0 + max: 70990016 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 242 + layers_on_npu: 241 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 242 - job_id: jmg9vxm57 + total_layers: 241 + job_id: jo5me8qyp job_status: Passed diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml index 1ecf1170..79181309 100644 --- a/qai_hub_models/models/esrgan/perf.yaml +++ b/qai_hub_models/models/esrgan/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ESRGAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 74047.0 - throughput: 13.504936054127784 + inference_time: 73806.0 + throughput: 13.54903395387909 estimated_peak_memory_range: - min: 12288 - max: 4695144 + min: 3256320 + max: 5857168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jnp10rl5q + job_id: jegn0kmv5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:41:56.326001Z' + timestamp: '2024-04-02T15:50:06.444234Z' torchscript_onnx_qnn: - inference_time: 65507.0 - throughput: 15.265544140320882 + inference_time: 69637.0 + throughput: 14.360182087108864 estimated_peak_memory_range: - min: 57344 - max: 55933800 + min: 143360 + max: 108258128 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 1027 + layers_on_npu: 1026 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 1027 - job_id: jz5woo6p1 + total_layers: 1026 + job_id: j2p04622g job_status: Passed - torchscript_onnx_tflite: - inference_time: 53553.0 - throughput: 18.673090209698803 + inference_time: 50712.0 + throughput: 19.71919861176842 estimated_peak_memory_range: - min: 3276800 - max: 574983152 + min: 77824 + max: 582298832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jvgdwjl5j + job_id: jqpyzmjrg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:46:30.960659Z' + timestamp: '2024-04-02T15:52:55.582636Z' torchscript_onnx_qnn: - inference_time: 50563.0 - throughput: 19.777307517354586 + inference_time: 49723.0 + throughput: 20.11141725157372 estimated_peak_memory_range: - min: 86016 - max: 240922112 + min: 1306624 + max: 256079456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 1027 + layers_on_npu: 1026 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 1027 - job_id: jmg9vvl57 + total_layers: 1026 + job_id: j1p821mzp job_status: Passed diff --git a/qai_hub_models/models/facebook_denoiser/perf.yaml b/qai_hub_models/models/facebook_denoiser/perf.yaml index 425ac777..186e257d 100644 --- a/qai_hub_models/models/facebook_denoiser/perf.yaml +++ b/qai_hub_models/models/facebook_denoiser/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Facebook-Denoiser performance_metrics: - torchscript_onnx_tflite: - inference_time: 711384.0 - throughput: 1.4057105585731475 + inference_time: 746968.0 + throughput: 1.338745434878067 estimated_peak_memory_range: - min: 236318720 - max: 349174920 + min: 379867136 + max: 382919144 primary_compute_unit: CPU precision: fp32 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 209 total_layers: 209 - job_id: j1p3kwm52 + job_id: jogkv8qyp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:10:56.043154Z' + timestamp: '2024-04-02T15:25:02.878241Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 670316.0 - throughput: 1.4918337023135357 + inference_time: 692152.0 + throughput: 1.4447693570198454 estimated_peak_memory_range: - min: 481374208 - max: 504692832 + min: 372510720 + max: 393584320 primary_compute_unit: CPU precision: fp32 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 209 total_layers: 209 - job_id: jwgoy4158 + job_id: jn5q0vr7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:10:56.043167Z' + timestamp: '2024-04-02T15:25:02.878255Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml index af668b3d..dc9b55d4 100644 --- a/qai_hub_models/models/fastsam_s/perf.yaml +++ b/qai_hub_models/models/fastsam_s/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FastSam-S performance_metrics: - torchscript_onnx_tflite: - inference_time: 13114.0 - throughput: 76.25438462711605 + inference_time: 8735.0 + throughput: 114.48196908986834 estimated_peak_memory_range: - min: 7823360 - max: 25444440 + min: 7831552 + max: 10552872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 288 - job_id: jegn21vgo + job_id: jw562wzvg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:39:15.450027Z' + timestamp: '2024-04-02T16:06:17.970106Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 9234.0 - throughput: 108.29542993285683 + inference_time: 6461.0 + throughput: 154.7748026621266 estimated_peak_memory_range: - min: 6332416 - max: 79756208 + min: 6328320 + max: 76883760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 288 - job_id: joprkxv50 + job_id: j1p3n61x5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:39:15.450036Z' + timestamp: '2024-04-02T16:06:17.970119Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml index f4f2b30b..c094b725 100644 --- a/qai_hub_models/models/fastsam_x/perf.yaml +++ b/qai_hub_models/models/fastsam_x/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FastSam-X performance_metrics: - torchscript_onnx_tflite: - inference_time: 64155.0 - throughput: 15.587249629802821 + inference_time: 51073.0 + throughput: 19.579817124508057 estimated_peak_memory_range: - min: 9207808 - max: 14058240 + min: 9240576 + max: 13971912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jw566k75o + job_id: jwgoz8n4p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:56:58.796143Z' + timestamp: '2024-04-02T15:25:26.476231Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 47867.0 - throughput: 20.891219420477572 + inference_time: 36142.0 + throughput: 27.66864036301256 estimated_peak_memory_range: - min: 7962624 - max: 152777152 + min: 98304 + max: 142182032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: j1p3kyz52 + job_id: j1pvq7r7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:56:58.796153Z' + timestamp: '2024-04-02T15:25:26.476245Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml index 477b5790..4020f79a 100644 --- a/qai_hub_models/models/fcn_resnet50/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FCN_ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 8550.0 - throughput: 116.95906432748538 + inference_time: 8557.0 + throughput: 116.86338670094659 estimated_peak_memory_range: - min: 4263936 - max: 6443424 + min: 159744 + max: 7109192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jn5q8dm57 + job_id: jygz2njzg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:32:21.594233Z' + timestamp: '2024-04-02T15:38:36.923072Z' torchscript_onnx_qnn: - inference_time: 7881.0 - throughput: 126.8874508311128 + inference_time: 7883.0 + throughput: 126.85525815045034 estimated_peak_memory_range: min: 20480 - max: 13250472 + max: 10311800 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 125 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: jw566075o + total_layers: 125 + job_id: jmg90dyqg job_status: Passed - torchscript_onnx_tflite: - inference_time: 6407.0 - throughput: 156.07928827844546 + inference_time: 6324.0 + throughput: 158.12776723592663 estimated_peak_memory_range: - min: 4251648 - max: 76376944 + min: 2187264 + max: 78458400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j1glnqlpv + job_id: jz5ww43z5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:34:20.486125Z' + timestamp: '2024-04-02T15:41:20.209217Z' torchscript_onnx_qnn: - inference_time: 5846.0 - throughput: 171.05713308244952 + inference_time: 5820.0 + throughput: 171.82130584192439 estimated_peak_memory_range: - min: 638976 - max: 55934880 + min: 618496 + max: 59720272 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 125 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: j1p3krz52 + total_layers: 125 + job_id: jnp126wkg job_status: Passed diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml index 37ffb9dc..2fe96c9f 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-122NS-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 10407.0 - throughput: 96.08917075045642 + inference_time: 9649.0 + throughput: 103.6376826614157 estimated_peak_memory_range: - min: 12288 - max: 2345904 + min: 647168 + max: 2901040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jmg9vel57 + job_id: jvgdn2qk5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:45:44.022843Z' + timestamp: '2024-04-02T15:16:46.197377Z' torchscript_onnx_qnn: - inference_time: 10785.0 - throughput: 92.7213722763097 + inference_time: 10810.0 + throughput: 92.50693802035153 estimated_peak_memory_range: - min: 6205440 - max: 39312144 + min: 6344704 + max: 40462128 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 349 + layers_on_npu: 348 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 349 - job_id: jvgdwle5j + total_layers: 348 + job_id: jqp4n3dqg job_status: Passed - torchscript_onnx_tflite: - inference_time: 7373.0 - throughput: 135.63000135630003 + inference_time: 6923.0 + throughput: 144.4460494005489 estimated_peak_memory_range: - min: 643072 - max: 58158976 + min: 405504 + max: 60494448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jnp10x25q + job_id: jz5729lqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:47:44.631260Z' + timestamp: '2024-04-02T15:19:30.896664Z' torchscript_onnx_qnn: - inference_time: 7627.0 - throughput: 131.1131506490101 + inference_time: 7600.0 + throughput: 131.57894736842104 estimated_peak_memory_range: - min: 6311936 - max: 85982464 + min: 6307840 + max: 86339936 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 349 + layers_on_npu: 348 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 349 - job_id: jz57z3lp3 + total_layers: 348 + job_id: j0px9x6jp job_status: Passed diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml index 038d58da..a4633790 100644 --- a/qai_hub_models/models/ffnet_40s/perf.yaml +++ b/qai_hub_models/models/ffnet_40s/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-40S performance_metrics: - torchscript_onnx_tflite: - inference_time: 22513.0 - throughput: 44.41878026029405 + inference_time: 22812.0 + throughput: 43.836577240049095 estimated_peak_memory_range: - min: 2539520 - max: 5190832 + min: 2555904 + max: 5191296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jwgoyl458 + job_id: jo5me86yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:39.279085Z' + timestamp: '2024-04-02T16:00:28.301836Z' torchscript_onnx_qnn: - inference_time: 17466.0 - throughput: 57.25409366769724 + inference_time: 17334.0 + throughput: 57.69008884273682 estimated_peak_memory_range: - min: 25210880 - max: 48310168 + min: 25214976 + max: 45212320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 141 + layers_on_npu: 140 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 141 - job_id: j7gjxr7pd + total_layers: 140 + job_id: jep2xelxg job_status: Passed - torchscript_onnx_tflite: - inference_time: 16613.0 - throughput: 60.19382411364594 + inference_time: 16599.0 + throughput: 60.24459304777396 estimated_peak_memory_range: - min: 61440 - max: 100488656 + min: 16384 + max: 106444032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j1pv3l75x + job_id: jopr6wevp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:15:49.686166Z' + timestamp: '2024-04-02T16:02:52.508368Z' torchscript_onnx_qnn: - inference_time: 12681.0 - throughput: 78.85813421654444 + inference_time: 12563.0 + throughput: 79.59882193743533 estimated_peak_memory_range: - min: 25182208 - max: 82551136 + min: 25210880 + max: 86653840 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 141 + layers_on_npu: 140 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 141 - job_id: jlpe977gr + total_layers: 140 + job_id: jqpyzm6rg job_status: Passed diff --git a/qai_hub_models/models/ffnet_40s_quantized/README.md b/qai_hub_models/models/ffnet_40s_quantized/README.md index 18fb3a95..12eee901 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/README.md +++ b/qai_hub_models/models/ffnet_40s_quantized/README.md @@ -16,6 +16,11 @@ a hosted Qualcomm® device. ## Example & Usage +Install the package via pip: +```bash +pip install "qai_hub_models[ffnet_40s_quantized]" +``` + Once installed, run the following simple CLI demo: diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml index 6d795ec7..945100e3 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-40S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 6439.0 - throughput: 155.3036185743128 + inference_time: 6451.0 + throughput: 155.0147263990079 estimated_peak_memory_range: - min: 888832 - max: 2660784 + min: 872448 + max: 25600304 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jqp4q92go + job_id: jogkv83yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:35:32.125659Z' + timestamp: '2024-04-02T15:52:22.278215Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 4671.0 - throughput: 214.08691928923142 + inference_time: 4634.0 + throughput: 215.79628830384118 estimated_peak_memory_range: - min: 16384 - max: 65022448 + min: 180224 + max: 67612432 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: j0pxvd8g7 + job_id: jn5q0v37p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:35:32.125673Z' + timestamp: '2024-04-02T15:52:22.278229Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/ffnet_40s_quantized/requirements.txt b/qai_hub_models/models/ffnet_40s_quantized/requirements.txt new file mode 100644 index 00000000..2470ac6c --- /dev/null +++ b/qai_hub_models/models/ffnet_40s_quantized/requirements.txt @@ -0,0 +1 @@ +scikit-image==0.21.0 diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml index 9f1617aa..68e7c6ba 100644 --- a/qai_hub_models/models/ffnet_54s/perf.yaml +++ b/qai_hub_models/models/ffnet_54s/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-54S performance_metrics: - torchscript_onnx_tflite: - inference_time: 24853.0 - throughput: 40.23659115599727 + inference_time: 25516.0 + throughput: 39.191095783038094 estimated_peak_memory_range: - min: 2572288 - max: 4947328 + min: 3219456 + max: 5162680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: j0pxv38g7 + job_id: jw562wnvg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:47:57.765081Z' + timestamp: '2024-04-02T16:00:41.754790Z' torchscript_onnx_qnn: - inference_time: 19975.0 - throughput: 50.06257822277847 + inference_time: 20433.0 + throughput: 48.94043948514658 estimated_peak_memory_range: - min: 25214976 - max: 52299192 + min: 25186304 + max: 50574640 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 176 + layers_on_npu: 175 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 176 - job_id: joprkok50 + total_layers: 175 + job_id: jwgoz834p job_status: Passed - torchscript_onnx_tflite: - inference_time: 18421.0 - throughput: 54.28586938819825 + inference_time: 18562.0 + throughput: 53.87350501023597 estimated_peak_memory_range: - min: 462848 - max: 113159440 + min: 2244608 + max: 122307680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jo5mro7gk + job_id: j1p3n6ex5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:49:56.432155Z' + timestamp: '2024-04-02T16:03:03.931513Z' torchscript_onnx_qnn: - inference_time: 14570.0 - throughput: 68.63417982155113 + inference_time: 14524.0 + throughput: 68.85155604516662 estimated_peak_memory_range: - min: 154132480 - max: 217703424 + min: 231440384 + max: 301103936 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 176 + layers_on_npu: 175 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 176 - job_id: jep2846p6 + total_layers: 175 + job_id: j1pvq7v7g job_status: Passed diff --git a/qai_hub_models/models/ffnet_54s_quantized/README.md b/qai_hub_models/models/ffnet_54s_quantized/README.md index 346077ff..03001eb0 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/README.md +++ b/qai_hub_models/models/ffnet_54s_quantized/README.md @@ -16,6 +16,11 @@ a hosted Qualcomm® device. ## Example & Usage +Install the package via pip: +```bash +pip install "qai_hub_models[ffnet_54s_quantized]" +``` + Once installed, run the following simple CLI demo: diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml index 1f34553f..e21ac3ac 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-54S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 7127.0 - throughput: 140.31149151115477 + inference_time: 7122.0 + throughput: 140.40999719180004 estimated_peak_memory_range: - min: 712704 - max: 2530520 + min: 823296 + max: 9540112 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j7gjxeepd + job_id: j7gjdqe7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:16:07.677264Z' + timestamp: '2024-04-02T15:24:42.915036Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 5136.0 - throughput: 194.70404984423675 + inference_time: 5147.0 + throughput: 194.28793471925394 estimated_peak_memory_range: - min: 16384 - max: 71676704 + min: 233472 + max: 74819648 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jnp10e75q + job_id: jz5ww4qz5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:16:07.677274Z' + timestamp: '2024-04-02T15:24:42.915050Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/ffnet_54s_quantized/requirements.txt b/qai_hub_models/models/ffnet_54s_quantized/requirements.txt new file mode 100644 index 00000000..2470ac6c --- /dev/null +++ b/qai_hub_models/models/ffnet_54s_quantized/requirements.txt @@ -0,0 +1 @@ +scikit-image==0.21.0 diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml index 3190c99d..46f4ced1 100644 --- a/qai_hub_models/models/ffnet_78s/perf.yaml +++ b/qai_hub_models/models/ffnet_78s/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-78S performance_metrics: - torchscript_onnx_tflite: - inference_time: 28993.0 - throughput: 34.49108405477184 + inference_time: 29260.0 + throughput: 34.17634996582365 estimated_peak_memory_range: - min: 2699264 - max: 4868664 + min: 2568192 + max: 5238920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j0pxvq9g7 + job_id: jmg90dwqg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:58:52.803970Z' + timestamp: '2024-04-02T15:27:40.352259Z' torchscript_onnx_qnn: - inference_time: 23765.0 - throughput: 42.07868714496108 + inference_time: 23452.0 + throughput: 42.64028654272557 estimated_peak_memory_range: - min: 25214976 - max: 45434792 + min: 24997888 + max: 45509104 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 236 + layers_on_npu: 235 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 236 - job_id: joprkre50 + total_layers: 235 + job_id: jvgdn2ok5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 21479.0 - throughput: 46.557102285953725 + inference_time: 21325.0 + throughput: 46.893317702227435 estimated_peak_memory_range: - min: 2478080 - max: 130875008 + min: 1220608 + max: 135944608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jegn24mgo + job_id: jnp126ekg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:00:58.594801Z' + timestamp: '2024-04-02T15:30:22.141498Z' torchscript_onnx_qnn: - inference_time: 17826.0 - throughput: 56.09783462358353 + inference_time: 17797.0 + throughput: 56.18924537843457 estimated_peak_memory_range: - min: 25219072 - max: 99798224 + min: 228487168 + max: 306923024 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 236 + layers_on_npu: 235 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 236 - job_id: jep281mp6 + total_layers: 235 + job_id: jz5ww4qj5 job_status: Passed diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml index e55df80c..31b25804 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-78S-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 10810.0 - throughput: 92.50693802035153 + inference_time: 10717.0 + throughput: 93.30969487729774 estimated_peak_memory_range: - min: 0 - max: 1890472 + min: 663552 + max: 2911376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jegn2dmgo + job_id: jmg90dwvg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:53:13.887710Z' + timestamp: '2024-04-02T15:16:39.163437Z' torchscript_onnx_qnn: - inference_time: 11408.0 - throughput: 87.6577840112202 + inference_time: 11424.0 + throughput: 87.53501400560224 estimated_peak_memory_range: - min: 16384 - max: 52414400 + min: 40960 + max: 53367328 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 237 + layers_on_npu: 236 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 237 - job_id: jep28qmp6 + total_layers: 236 + job_id: jvgdn2ol5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 7768.0 - throughput: 128.73326467559218 + inference_time: 7571.0 + throughput: 132.0829480914014 estimated_peak_memory_range: - min: 540672 - max: 52237632 + min: 45056 + max: 50924912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: joprkme50 + job_id: jnp126elg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:57:44.327749Z' + timestamp: '2024-04-02T15:19:20.871561Z' torchscript_onnx_qnn: - inference_time: 8084.0 - throughput: 123.70113805047006 + inference_time: 7980.0 + throughput: 125.31328320802005 estimated_peak_memory_range: - min: 6328320 - max: 72586224 + min: 6307840 + max: 71292128 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 237 + layers_on_npu: 236 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 237 - job_id: jqpyek4gy + total_layers: 236 + job_id: jz5729drp job_status: Passed diff --git a/qai_hub_models/models/ffnet_78s_quantized/README.md b/qai_hub_models/models/ffnet_78s_quantized/README.md index e4cfbc65..6c65bf52 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/README.md +++ b/qai_hub_models/models/ffnet_78s_quantized/README.md @@ -16,6 +16,11 @@ a hosted Qualcomm® device. ## Example & Usage +Install the package via pip: +```bash +pip install "qai_hub_models[ffnet_78s_quantized]" +``` + Once installed, run the following simple CLI demo: diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml index 3201ba82..677fc421 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: FFNet-78S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 8368.0 - throughput: 119.50286806883365 + inference_time: 8383.0 + throughput: 119.28903733746868 estimated_peak_memory_range: - min: 663552 - max: 2264096 + min: 692224 + max: 40285240 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jegn2jmgo + job_id: jqp4n3wlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:31:42.853131Z' + timestamp: '2024-04-02T15:19:19.002436Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 6095.0 - throughput: 164.06890894175552 + inference_time: 5978.0 + throughput: 167.2800267648043 estimated_peak_memory_range: - min: 16384 - max: 84212448 + min: 28672 + max: 87145904 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jep282mp6 + job_id: j0px9x19p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:31:42.853166Z' + timestamp: '2024-04-02T15:19:19.002449Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/ffnet_78s_quantized/requirements.txt b/qai_hub_models/models/ffnet_78s_quantized/requirements.txt new file mode 100644 index 00000000..2470ac6c --- /dev/null +++ b/qai_hub_models/models/ffnet_78s_quantized/requirements.txt @@ -0,0 +1 @@ +scikit-image==0.21.0 diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml index 655972d7..ddbc731d 100644 --- a/qai_hub_models/models/googlenet/perf.yaml +++ b/qai_hub_models/models/googlenet/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: GoogLeNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1041.0 - throughput: 960.6147934678194 + inference_time: 1043.0 + throughput: 958.7727708533077 estimated_peak_memory_range: min: 12288 - max: 1836376 + max: 2222648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: joprq3950 + job_id: j2p046reg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:24:33.473846Z' + timestamp: '2024-04-02T15:44:36.230603Z' torchscript_onnx_qnn: - inference_time: 1083.0 - throughput: 923.3610341643582 + inference_time: 1085.0 + throughput: 921.6589861751152 estimated_peak_memory_range: - min: 32768 - max: 26497136 + min: 28672 + max: 26694664 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 144 + layers_on_npu: 143 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: jqpyw37gy + total_layers: 143 + job_id: jogkv8yop job_status: Passed - torchscript_onnx_tflite: - inference_time: 647.0 - throughput: 1545.595054095827 + inference_time: 685.0 + throughput: 1459.85401459854 estimated_peak_memory_range: - min: 16384 - max: 45415536 + min: 12288 + max: 45701264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jep26y4g6 + job_id: j1p82178p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:28:59.659531Z' + timestamp: '2024-04-02T15:47:18.152147Z' torchscript_onnx_qnn: - inference_time: 682.0 - throughput: 1466.275659824047 + inference_time: 694.0 + throughput: 1440.922190201729 estimated_peak_memory_range: - min: 0 - max: 49977664 + min: 618496 + max: 54284752 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 144 + layers_on_npu: 143 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: j2p0q065w + total_layers: 143 + job_id: jn5q0v2mp job_status: Passed diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml index e9d21985..0b855f33 100644 --- a/qai_hub_models/models/googlenet_quantized/perf.yaml +++ b/qai_hub_models/models/googlenet_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: GoogLeNetQuantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 331.0 - throughput: 3021.1480362537764 + inference_time: 290.0 + throughput: 3448.2758620689656 estimated_peak_memory_range: min: 12288 - max: 1926544 + max: 1574472 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 87 + layers_on_npu: 85 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 87 - job_id: jnp109l5q + total_layers: 85 + job_id: j1gl4lkl5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,36 +53,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:54:15.624495Z' + timestamp: '2024-04-02T16:10:55.550466Z' torchscript_onnx_qnn: - inference_time: 365.0 - throughput: 2739.72602739726 + inference_time: 337.0 + throughput: 2967.359050445104 estimated_peak_memory_range: - min: 638976 - max: 5546832 + min: 73728 + max: 4963272 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jqp4qzlgo + total_layers: 86 + job_id: jwgoz8vdp job_status: Passed - torchscript_onnx_tflite: - inference_time: 248.0 - throughput: 4032.2580645161293 + inference_time: 208.0 + throughput: 4807.692307692308 estimated_peak_memory_range: - min: 16384 - max: 32361600 + min: 12288 + max: 33584240 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 87 + layers_on_npu: 85 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 87 - job_id: jz57zqrp3 + total_layers: 85 + job_id: j1p3n6mz5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:00:04.109028Z' + timestamp: '2024-04-02T16:12:28.573393Z' torchscript_onnx_qnn: - inference_time: 258.0 - throughput: 3875.968992248062 + inference_time: 248.0 + throughput: 4032.2580645161293 estimated_peak_memory_range: - min: 618496 - max: 47357168 + min: 163840 + max: 44949040 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: j0pxvw9g7 + total_layers: 86 + job_id: j1pvq7wmg job_status: Passed diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml index e8ac7da2..2901041e 100644 --- a/qai_hub_models/models/hrnet_pose/perf.yaml +++ b/qai_hub_models/models/hrnet_pose/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: HRNetPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 2519.0 - throughput: 396.9829297340214 + inference_time: 2297.0 + throughput: 435.35045711798 estimated_peak_memory_range: - min: 24576 - max: 3015464 + min: 16384 + max: 2784976 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 515 + layers_on_npu: 514 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 515 - job_id: jep28oxp6 + total_layers: 514 + job_id: j7gjdql8g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,36 +53,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:57:53.421052Z' + timestamp: '2024-04-02T15:27:57.614569Z' torchscript_onnx_qnn: - inference_time: 2608.0 - throughput: 383.4355828220859 + inference_time: 2295.0 + throughput: 435.7298474945534 estimated_peak_memory_range: - min: 49152 - max: 58039344 + min: 12288 + max: 58975320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 747 + layers_on_npu: 745 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 747 - job_id: j1p8ojzg9 + total_layers: 745 + job_id: jygz2n76g job_status: Passed - torchscript_onnx_tflite: - inference_time: 1878.0 - throughput: 532.4813631522896 + inference_time: 1723.0 + throughput: 580.3830528148578 estimated_peak_memory_range: min: 16384 - max: 103402912 + max: 106291456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 515 + layers_on_npu: 514 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 515 - job_id: j2p0yo2gw + total_layers: 514 + job_id: jlpeoyv0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:59:59.942614Z' + timestamp: '2024-04-02T15:30:43.560020Z' torchscript_onnx_qnn: - inference_time: 1922.0 - throughput: 520.2913631633714 + inference_time: 1715.0 + throughput: 583.0903790087464 estimated_peak_memory_range: min: 606208 - max: 178228720 + max: 177690672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 747 + layers_on_npu: 745 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 747 - job_id: j1glnwepv + total_layers: 745 + job_id: jz5ww49j5 job_status: Passed diff --git a/qai_hub_models/models/hrnet_pose_quantized/README.md b/qai_hub_models/models/hrnet_pose_quantized/README.md deleted file mode 100644 index 4697d29f..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/README.md +++ /dev/null @@ -1,59 +0,0 @@ -[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) - - -# [HRNetPoseQuantized: Perform accurate human pose estimation](https://aihub.qualcomm.com/models/hrnet_pose_quantized) - -HRNet performs pose estimation in high-resolution representations. - -This is based on the implementation of HRNetPoseQuantized found -[here](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet). This repository contains scripts for optimized on-device -export suitable to run on Qualcomm® devices. More details on model performance -accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose_quantized). - -[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on -a hosted Qualcomm® device. - - -## Example & Usage - -Install the package via pip: -```bash -pip install "qai_hub_models[hrnet_pose_quantized]" -``` - - -Once installed, run the following simple CLI demo: - -```bash -python -m qai_hub_models.models.hrnet_pose_quantized.demo -``` -More details on the CLI tool can be found with the `--help` option. See -[demo.py](demo.py) for sample usage of the model including pre/post processing -scripts. Please refer to our [general instructions on using -models](../../../#getting-started) for more usage instructions. - -## Export for on-device deployment - -This repository contains export scripts that produce a model optimized for -on-device deployment. This can be run as follows: - -```bash -python -m qai_hub_models.models.hrnet_pose_quantized.export -``` -Additional options are documented with the `--help` option. Note that the above -script requires access to Deployment instructions for Qualcomm® AI Hub. - -## License -- The license for the original implementation of HRNetPoseQuantized can be found - [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) - -## References -* [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212) -* [Source Model Implementation](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet) - -## Community -* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI. -* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). - - diff --git a/qai_hub_models/models/hrnet_pose_quantized/__init__.py b/qai_hub_models/models/hrnet_pose_quantized/__init__.py deleted file mode 100644 index 26dbe409..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp # noqa: F401 - -from .model import MODEL_ID # noqa: F401 -from .model import HRNetPoseQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/hrnet_pose_quantized/conftest.py b/qai_hub_models/models/hrnet_pose_quantized/conftest.py deleted file mode 100644 index 04066f70..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/conftest.py +++ /dev/null @@ -1,26 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. - -from unittest.mock import patch - -import pytest - -from qai_hub_models.models.hrnet_pose_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check - - -@pytest.fixture(autouse=True) -@skip_clone_repo_check -def mock_from_pretrained(): - """ - Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations - across all tests return the cached instance of the model. - """ - mock = patch( - "qai_hub_models.models.hrnet_pose_quantized.Model.from_pretrained", - return_value=Model.from_pretrained(), - ) - mock.start() diff --git a/qai_hub_models/models/hrnet_pose_quantized/demo.py b/qai_hub_models/models/hrnet_pose_quantized/demo.py deleted file mode 100644 index a5eca7ae..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/demo.py +++ /dev/null @@ -1,57 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp -from qai_hub_models.models.hrnet_pose_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - HRNetPoseQuantizable, -) -from qai_hub_models.utils.args import ( - demo_model_from_cli_args, - get_model_cli_parser, - get_on_device_demo_parser, - validate_on_device_demo_args, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.display import display_or_save_image - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "hrnet_pose_demo.png" -) - - -# The demo will display a image with the predicted keypoints. -def main(is_test: bool = False): - # Demo parameters - parser = get_model_cli_parser(HRNetPoseQuantizable) - parser = get_on_device_demo_parser(parser, add_output_dir=True) - parser.add_argument( - "--image", - type=str, - default=IMAGE_ADDRESS, - help="image file path or URL", - ) - - args = parser.parse_args([] if is_test else None) - validate_on_device_demo_args(args, MODEL_ID) - - # Load image & model - model = demo_model_from_cli_args(HRNetPoseQuantizable, MODEL_ID, args) - image = load_image(args.image) - print("Model Loaded") - - app = HRNetPoseApp(model) - keypoints = app.predict_pose_keypoints(image)[0] - if not is_test: - display_or_save_image( - keypoints, - args.output_dir, - "hrnetpose_quantized_demo_output.png", - "keypoints", - ) - - -if __name__ == "__main__": - main() diff --git a/qai_hub_models/models/hrnet_pose_quantized/export.py b/qai_hub_models/models/hrnet_pose_quantized/export.py deleted file mode 100644 index ec61ebb0..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/export.py +++ /dev/null @@ -1,215 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. - - -from __future__ import annotations - -import os -import warnings -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, cast - -import qai_hub as hub - -from qai_hub_models.models.hrnet_pose_quantized import Model -from qai_hub_models.utils.args import ( - export_parser, - get_input_spec_kwargs, - get_model_kwargs, -) -from qai_hub_models.utils.base_model import TargetRuntime -from qai_hub_models.utils.compare import torch_inference -from qai_hub_models.utils.printing import ( - print_inference_metrics, - print_on_target_demo_cmd, - print_profile_metrics_from_job, -) -from qai_hub_models.utils.qai_hub_helpers import ( - can_access_qualcomm_ai_hub, - export_without_hub_access, - transpose_channel_first_to_last, - transpose_channel_last_to_first, -) -from qai_hub_models.utils.qnn_helpers import get_qnn_inputs - - -def export_model( - device: str = "Samsung Galaxy S23", - skip_profiling: bool = False, - skip_inferencing: bool = False, - skip_downloading: bool = False, - skip_summary: bool = False, - output_dir: Optional[str] = None, - target_runtime: TargetRuntime = TargetRuntime.TFLITE, - compile_options: str = "", - profile_options: str = "", - **additional_model_kwargs, -) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[ - str -]: - """ - This function accomplishes 6 main tasks: - - 1. Instantiates a PyTorch model and converts it to a traced TorchScript format. - 2. Compiles the model to an asset that can be run on device. - 3. Profiles the model performance on real devices. - 4. Inferences the model on sample inputs. - 5. Downloads the model asset to the local directory. - 6. Summarizes the results from profiling and inference. - - Each of the last four steps can be optionally skipped using the input options. - - Parameters: - device: Device for which to export the model. - Full list of available devices can be found by running `hub.get_devices()`. - Defaults to DEFAULT_DEVICE if not specified. - skip_profiling: If set, skips profiling of compiled model on real devices. - skip_inferencing: If set, skips computing on-device outputs from sample data. - skip_downloading: If set, skips downloading of compiled model. - skip_summary: If set, skips waiting for and summarizing results - from profiling and inference. - output_dir: Directory to store generated assets (e.g. compiled model). - Defaults to `/build/`. - target_runtime: Which on-device runtime to target. Default is TFLite. - compile_options: Additional options to pass when submitting the compile job. - profile_options: Additional options to pass when submitting the profile job. - **additional_model_kwargs: Additional optional kwargs used to customize - `model_cls.from_pretrained` and `model.get_input_spec` - - Returns: - A 3-tuple of: - * A CompileJob object containing metadata about the compile job submitted to hub. - * A ProfileJob containing metadata about the profile job (None if profiling skipped). - * An InferenceJob containing metadata about the inference job (None if inferencing skipped). - """ - model_name = "hrnet_pose_quantized" - output_path = Path(output_dir or Path.cwd() / "build" / model_name) - if not can_access_qualcomm_ai_hub(): - return export_without_hub_access( - "hrnet_pose_quantized", - "HRNetPoseQuantized", - device, - skip_profiling, - skip_inferencing, - skip_downloading, - skip_summary, - output_path, - target_runtime, - compile_options, - profile_options, - ) - - # 1. Initialize PyTorch model - model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) - input_spec = model.get_input_spec( - **get_input_spec_kwargs(model, additional_model_kwargs) - ) - - # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec - ) - if target_runtime == TargetRuntime.TFLITE: - quant_calibration_data = None - else: - quant_calibration_data = model.get_calibration_data(target_runtime, input_spec) - - # 2. Compile the model to an on-device asset - model_compile_options = model.get_hub_compile_options( - target_runtime, - compile_options - + " --force_channel_last_input image" - + " --force_channel_last_output output_0", - ) - print(f"Optimizing model {model_name} to run on-device") - submitted_compile_job = hub.submit_compile_job( - model=source_model, - input_specs=input_spec, - device=hub.Device(device), - name=model_name, - calibration_data=quant_calibration_data, - options=model_compile_options, - ) - compile_job = cast(hub.client.CompileJob, submitted_compile_job) - - # 3. Profile the model asset on real devices - profile_job: Optional[hub.client.ProfileJob] = None - if not skip_profiling: - profile_options_all = model.get_hub_profile_options( - target_runtime, profile_options - ) - print(f"Profiling model {model_name} on a hosted device.") - submitted_profile_job = hub.submit_profile_job( - model=compile_job.get_target_model(), - device=hub.Device(device), - name=model_name, - options=profile_options_all, - ) - profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - - # 4. Run inference on-device with sample inputs - inference_job: Optional[hub.client.InferenceJob] = None - if not skip_inferencing: - profile_options_all = model.get_hub_profile_options( - target_runtime, profile_options - ) - print( - f"Running inference for {model_name} on a hosted device with example inputs." - ) - sample_inputs = model.sample_inputs(input_spec) - hub_inputs = sample_inputs - if target_runtime == TargetRuntime.QNN: - hub_inputs = get_qnn_inputs(compile_job, sample_inputs) - # Convert inputs from channel first to channel last - hub_inputs = transpose_channel_first_to_last( - "image", sample_inputs, target_runtime - ) - submitted_inference_job = hub.submit_inference_job( - model=compile_job.get_target_model(), - inputs=hub_inputs, - device=hub.Device(device), - name=model_name, - options=profile_options_all, - ) - inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - - # 5. Download the model asset to a local file - if not skip_downloading: - os.makedirs(output_path, exist_ok=True) - target_model: hub.Model = compile_job.get_target_model() # type: ignore - target_model.download(str(output_path / f"{model_name}.tflite")) - - # 6. Summarize the results from profiling and inference - if not skip_summary and not skip_profiling: - assert profile_job is not None and profile_job.wait().success - profile_data: Dict[str, Any] = profile_job.download_profile() # type: ignore - print_profile_metrics_from_job(profile_job, profile_data) - - if not skip_summary and not skip_inferencing: - torch_out = torch_inference(model, sample_inputs) - assert inference_job is not None and inference_job.wait().success - inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore - # Convert outputs from channel last to channel first - inference_result = transpose_channel_last_to_first( - "output_0", inference_result, target_runtime - ) - print_inference_metrics(inference_job, inference_result, torch_out) - - if not skip_summary: - print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device) - - return (compile_job, profile_job, inference_job) - - -def main(): - warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) - args = parser.parse_args() - export_model(**vars(args)) - - -if __name__ == "__main__": - main() diff --git a/qai_hub_models/models/hrnet_pose_quantized/info.yaml b/qai_hub_models/models/hrnet_pose_quantized/info.yaml deleted file mode 100644 index 539d9ab1..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/info.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: HRNetPoseQuantized -# id must match with the model dir name in qai_hub_models -id: hrnet_pose_quantized -status: public -headline: Perform accurate human pose estimation. -domain: Computer Vision -use_case: Pose Estimation -description: HRNet performs pose estimation in high-resolution representations. -tags: - - quantized -research_paper: https://arxiv.org/abs/1902.09212 -research_paper_title: Deep High-Resolution Representation Learning for Human Pose - Estimation -license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf -source_repo: - https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet -technical_details: - Model checkpoint: hrnet_posenet_FP32_state_dict - Input resolution: 192x256 - Number of parameters: 28.5M - Model size: 109 MB -applicable_scenarios: - - Injury prevention training - - Sports performance analysis - - Posture recognition -form_factors: - - Phone - - Tablet - - IoT -related_models: [litehrnet, hrnet_pose] -has_static_banner: yes -has_animated_banner: no -license_type: other -deploy_license_type: AI Model Hub License -dataset: [] diff --git a/qai_hub_models/models/hrnet_pose_quantized/model.py b/qai_hub_models/models/hrnet_pose_quantized/model.py deleted file mode 100644 index 00191c59..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/model.py +++ /dev/null @@ -1,78 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -from __future__ import annotations - -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, -) - -# isort: on - -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - -from qai_hub_models.models.hrnet_pose.model import HRNetPose -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 1 -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/hrnet_posenet/models/model_cards/hrnet_posenet_w8a8.json: -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_march_artifacts/hrnet_posenet_W8A8_state_dict.pth -# Encodings were generated with AIMET QuantSim export -QUANTIZED_WEIGHTS = "hrnet_posenet_W8A8_state_dict.pth" -AIMET_ENCODINGS = "hrnetpose_aimet_quantization_encodings.json" -AIMET_CONFIG = "default_config_per_channel.json" - - -class HRNetPoseQuantizable(AIMETQuantizableMixin, HRNetPose): - """HRNetPose with post training quantization suport - - Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. - Support for quantizing using your own weights & data will come at a later date.""" - - def __init__( - self, - hrnet_model: QuantizationSimModel, - ) -> None: - HRNetPose.__init__(self, hrnet_model.model) - AIMETQuantizableMixin.__init__( - self, hrnet_model, needs_onnx_direct_aimet_export=True - ) - - @classmethod - def from_pretrained(cls) -> HRNetPoseQuantizable: - model = HRNetPose.from_pretrained() - input_shape = HRNetPose.get_input_spec()["image"][0] - equalize_model(model, input_shape) - - weights = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, QUANTIZED_WEIGHTS - ).fetch() - aimet_config = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, AIMET_CONFIG - ).fetch() - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, AIMET_ENCODINGS - ).fetch() - - # Load the model weights and quantization parameters - state_dict = torch.load(weights, map_location=torch.device("cpu")) - new_state_dict = {"model." + key: value for key, value in state_dict.items()} - model.load_state_dict(new_state_dict) - sim = QuantizationSimModel( - model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=aimet_config, - dummy_input=torch.rand(input_shape), - ) - load_encodings_to_sim(sim, aimet_encodings) - - return cls(sim) diff --git a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml deleted file mode 100644 index db64d4b2..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml +++ /dev/null @@ -1,108 +0,0 @@ -aggregated: - supported_oses: - - Android - supported_devices: - - Google Pixel 3 - - Google Pixel 3a - - Google Pixel 3a XL - - Google Pixel 4 - - Google Pixel 4a - - Google Pixel 5a 5G - - Samsung Galaxy S21 - - Samsung Galaxy S21 Ultra - - Samsung Galaxy S21+ - - Samsung Galaxy S22 5G - - Samsung Galaxy S22 Ultra 5G - - Samsung Galaxy S22+ 5G - - Samsung Galaxy S23 - - Samsung Galaxy S23 Ultra - - Samsung Galaxy S23+ - - Samsung Galaxy S24 - - Samsung Galaxy S24 Ultra - - Samsung Galaxy Tab S8 - - Xiaomi 12 - - Xiaomi 12 Pro - supported_chipsets: - - Snapdragon® 8 Gen 1 - - Snapdragon® 8 Gen 2 - - Snapdragon® 8 Gen 3 - - Snapdragon® 888 -models: -- name: HRNetPoseQuantized - performance_metrics: - - torchscript_onnx_tflite: - inference_time: 2539.0 - throughput: 393.8558487593541 - estimated_peak_memory_range: - min: 24576 - max: 4215600 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 515 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 515 - job_id: jo5mr2wgk - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:46:47.467700Z' - torchscript_onnx_qnn: - inference_time: 'null' - throughput: 'null' - estimated_peak_memory_range: - min: 0 - max: 0 - primary_compute_unit: 'null' - precision: 'null' - layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 0 - job_id: '' - job_status: Skipped - - torchscript_onnx_tflite: - inference_time: 1859.0 - throughput: 537.9236148466917 - estimated_peak_memory_range: - min: 16384 - max: 102354800 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 515 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 515 - job_id: jegn2yrgo - job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:46:47.467710Z' - torchscript_onnx_qnn: - inference_time: 'null' - throughput: 'null' - estimated_peak_memory_range: - min: 0 - max: 0 - primary_compute_unit: 'null' - precision: 'null' - layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 0 - job_id: '' - job_status: Skipped diff --git a/qai_hub_models/models/hrnet_pose_quantized/requirements.txt b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt deleted file mode 100644 index 11ca0687..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -yacs==0.1.8 -mmpose==1.2.0 -mmcv==2.1.0 -mmdet==3.2.0 diff --git a/qai_hub_models/models/hrnet_pose_quantized/test.py b/qai_hub_models/models/hrnet_pose_quantized/test.py deleted file mode 100644 index 343af7bc..00000000 --- a/qai_hub_models/models/hrnet_pose_quantized/test.py +++ /dev/null @@ -1,46 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -import numpy as np -import torch - -from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp -from qai_hub_models.models.hrnet_pose.demo import IMAGE_ADDRESS -from qai_hub_models.models.hrnet_pose.demo import main as demo_main -from qai_hub_models.models.hrnet_pose_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - HRNetPoseQuantizable, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check - -OUTPUT_IMAGE_LOCAL_PATH = "hrnetpose_quantized_output.png" -OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, OUTPUT_IMAGE_LOCAL_PATH -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(IMAGE_ADDRESS) - model = HRNetPoseQuantizable.from_pretrained() - app = HRNetPoseApp(model=model) - output = app.predict(image)[0] - - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - assert_most_close( - np.asarray(output, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - 0.005, - rtol=0.02, - atol=0.2, - ) - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py index cca89a2f..5074e78e 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py @@ -76,6 +76,14 @@ def get_input_spec( # the model input specification upon submitting a profile job. return {"input": ((batch_size, sample_length), "float32")} + def get_hub_profile_options( + self, target_runtime: TargetRuntime, other_profile_options: str = "" + ) -> str: + profile_options = super().get_hub_profile_options( + target_runtime, other_profile_options + ) + return profile_options + " --compute_unit cpu" + # Modules used to override Huggingface WavLM to be NPU friendly class SliceConv1d(torch.nn.Module): @@ -169,22 +177,6 @@ def forward(self, x): x = torch.concat(torch.unbind(x, axis=2), axis=-1) return x[:, :, :-1] - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --compute_unit gpu" - - def get_hub_profile_options( - self, target_runtime: TargetRuntime, other_profile_options: str = "" - ) -> str: - profile_options = super().get_hub_profile_options( - target_runtime, other_profile_options - ) - return profile_options + " --compute_unit gpu" - def convert_to_wavlm_npu(model: WavLMModel): """ diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml index e4c34f61..799efc89 100644 --- a/qai_hub_models/models/inception_v3/perf.yaml +++ b/qai_hub_models/models/inception_v3/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Inception-v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1435.0 - throughput: 696.8641114982578 + inference_time: 1428.0 + throughput: 700.2801120448179 estimated_peak_memory_range: - min: 20480 - max: 1921832 + min: 28672 + max: 2409000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jqpyeorgy + job_id: jqp4n3xlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:30:27.461416Z' + timestamp: '2024-04-02T15:16:46.144805Z' torchscript_onnx_qnn: - inference_time: 1475.0 - throughput: 677.9661016949152 + inference_time: 1458.0 + throughput: 685.8710562414266 estimated_peak_memory_range: - min: 20480 - max: 148512392 + min: 622592 + max: 149278208 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 220 + layers_on_npu: 219 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 220 - job_id: j1p8oezg9 + total_layers: 219 + job_id: jo5me8wqp job_status: Passed - torchscript_onnx_tflite: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1047.0 + throughput: 955.1098376313277 estimated_peak_memory_range: min: 12288 - max: 50854560 + max: 51670896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j2p0ym2gw + job_id: j0px9x79p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:33:41.582505Z' + timestamp: '2024-04-02T15:19:30.685618Z' torchscript_onnx_qnn: - inference_time: 1082.0 - throughput: 924.2144177449168 + inference_time: 1083.0 + throughput: 923.3610341643582 estimated_peak_memory_range: min: 618496 - max: 68383952 + max: 66991296 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 220 + layers_on_npu: 219 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 220 - job_id: jogkz2ygd + total_layers: 219 + job_id: jegn0k9m5 job_status: Passed diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml index b06c4a03..481f8b41 100644 --- a/qai_hub_models/models/inception_v3_quantized/perf.yaml +++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Inception-v3-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 615.0 - throughput: 1626.0162601626016 + inference_time: 633.0 + throughput: 1579.778830963665 estimated_peak_memory_range: - min: 36864 - max: 2508048 + min: 12288 + max: 1553272 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jz57zj9p3 + job_id: jopr6w4ep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:41:04.203939Z' + timestamp: '2024-04-02T15:19:51.517901Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 487.0 - throughput: 2053.388090349076 + inference_time: 461.0 + throughput: 2169.1973969631235 estimated_peak_memory_range: - min: 0 - max: 63551712 + min: 12288 + max: 64115632 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j0pxv7lg7 + job_id: jep2xe7mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:41:04.203947Z' + timestamp: '2024-04-02T15:19:51.517914Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml index af8a440d..c73b61c7 100644 --- a/qai_hub_models/models/lama_dilated/perf.yaml +++ b/qai_hub_models/models/lama_dilated/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: LaMa-Dilated performance_metrics: - torchscript_onnx_tflite: - inference_time: 88628.0 - throughput: 11.283115945299453 + inference_time: 87826.0 + throughput: 11.386149887277115 estimated_peak_memory_range: - min: 3252224 - max: 140731056 + min: 3280896 + max: 139026816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 346 - job_id: j2p0yv0gw + job_id: jqpyzm44g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:53:06.506039Z' + timestamp: '2024-04-02T15:27:34.825319Z' torchscript_onnx_qnn: - inference_time: 84164.0 - throughput: 11.881564564421843 + inference_time: 82023.0 + throughput: 12.191702327395975 estimated_peak_memory_range: - min: 4321280 - max: 33964280 + min: 667648 + max: 36691936 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 333 + layers_on_npu: 332 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 333 - job_id: j1gln12pv + total_layers: 332 + job_id: jogkv8lop job_status: Passed - torchscript_onnx_tflite: - inference_time: 62025.0 - throughput: 16.12253123740427 + inference_time: 61367.0 + throughput: 16.295403066794858 estimated_peak_memory_range: - min: 225280 - max: 245293744 + min: 36864 + max: 268387328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 346 - job_id: jogkz9vgd + job_id: j1p82138p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:58:58.279247Z' + timestamp: '2024-04-02T15:30:17.633943Z' torchscript_onnx_qnn: - inference_time: 58950.0 - throughput: 16.963528413910094 + inference_time: 57731.0 + throughput: 17.321716235644626 estimated_peak_memory_range: - min: 78331904 - max: 243926976 + min: 155123712 + max: 340846160 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 333 + layers_on_npu: 332 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 333 - job_id: jw566dn5o + total_layers: 332 + job_id: jn5q0v7mp job_status: Passed diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml index d1a63bd8..a4fdf14d 100644 --- a/qai_hub_models/models/litehrnet/perf.yaml +++ b/qai_hub_models/models/litehrnet/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: LiteHRNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 15866.0 - throughput: 63.02785831337451 + inference_time: 15544.0 + throughput: 64.33350488934637 estimated_peak_memory_range: - min: 6811648 - max: 10391632 + min: 6557696 + max: 21204936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: jn5q83o57 + job_id: j1gl4l0l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:41:50.802497Z' + timestamp: '2024-04-02T15:47:29.705067Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 10704.0 - throughput: 93.42301943198804 + inference_time: 10368.0 + throughput: 96.45061728395062 estimated_peak_memory_range: min: 20480 - max: 71674208 + max: 72953920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: j1glnkmpv + job_id: jw562w37g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:41:50.802505Z' + timestamp: '2024-04-02T15:47:29.705081Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml index bf104ca8..cd78e1c7 100644 --- a/qai_hub_models/models/mediapipe_face/perf.yaml +++ b/qai_hub_models/models/mediapipe_face/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: MediaPipeFaceDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 532.0 - throughput: 1879.6992481203008 + inference_time: 785.0 + throughput: 1273.8853503184714 estimated_peak_memory_range: min: 12288 - max: 1591696 + max: 1644744 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 111 + layers_on_npu: 112 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 111 - job_id: jn5q8nm57 + total_layers: 112 + job_id: j1p3n64z5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,36 +53,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:50.277943Z' + timestamp: '2024-04-02T15:54:57.036020Z' torchscript_onnx_qnn: - inference_time: 535.0 - throughput: 1869.1588785046729 + inference_time: 836.0 + throughput: 1196.1722488038276 estimated_peak_memory_range: - min: 16384 - max: 4401872 + min: 811008 + max: 7017760 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 111 + layers_on_npu: 148 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 111 - job_id: jwgoyxd58 + total_layers: 148 + job_id: jlpeoyr0g job_status: Passed - torchscript_onnx_tflite: - inference_time: 380.0 - throughput: 2631.5789473684213 + inference_time: 546.0 + throughput: 1831.5018315018315 estimated_peak_memory_range: - min: 12288 - max: 27416464 + min: 16384 + max: 29604672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 111 + layers_on_npu: 112 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 111 - job_id: jw566x75o + total_layers: 112 + job_id: j1pvq71mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,38 +91,38 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:15:59.596663Z' + timestamp: '2024-04-02T15:57:38.738298Z' torchscript_onnx_qnn: - inference_time: 381.0 - throughput: 2624.6719160104985 + inference_time: 594.0 + throughput: 1683.5016835016836 estimated_peak_memory_range: min: 12288 - max: 26948416 + max: 43350624 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 111 + layers_on_npu: 148 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 111 - job_id: j7gjx98pd + total_layers: 148 + job_id: jz5ww4dj5 job_status: Passed - name: MediaPipeFaceLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 211.0 - throughput: 4739.336492890995 + inference_time: 309.0 + throughput: 3236.2459546925566 estimated_peak_memory_range: - min: 24576 - max: 1810232 + min: 12288 + max: 1598880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 100 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 100 - job_id: j1glndlpv + total_layers: 101 + job_id: jwgoz81dp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,36 +131,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:24:30.398348Z' + timestamp: '2024-04-02T16:04:19.041124Z' torchscript_onnx_qnn: - inference_time: 210.0 - throughput: 4761.9047619047615 + inference_time: 394.0 + throughput: 2538.0710659898477 estimated_peak_memory_range: - min: 28672 - max: 1684984 + min: 471040 + max: 59853120 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 100 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 100 - job_id: j1pv38m5x + total_layers: 107 + job_id: jygz2nx6g job_status: Passed - torchscript_onnx_tflite: - inference_time: 159.0 - throughput: 6289.308176100629 + inference_time: 253.0 + throughput: 3952.5691699604745 estimated_peak_memory_range: min: 12288 - max: 24695408 + max: 25742560 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 100 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 100 - job_id: j1p3kdz52 + total_layers: 101 + job_id: j7gjdq08g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,19 +169,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:28:56.546828Z' + timestamp: '2024-04-02T16:06:04.371420Z' torchscript_onnx_qnn: - inference_time: 156.0 - throughput: 6410.25641025641 + inference_time: 284.0 + throughput: 3521.1267605633802 estimated_peak_memory_range: - min: 16384 - max: 24996560 + min: 12288 + max: 37168176 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 100 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 100 - job_id: jlpe9q0gr + total_layers: 107 + job_id: jmg90d3vg job_status: Passed diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml index dad8f571..4a2806a8 100644 --- a/qai_hub_models/models/mediapipe_hand/perf.yaml +++ b/qai_hub_models/models/mediapipe_hand/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: MediaPipeHandDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 765.0 - throughput: 1307.18954248366 + inference_time: 963.0 + throughput: 1038.4215991692627 estimated_peak_memory_range: - min: 12288 - max: 12061368 + min: 24576 + max: 3475384 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 151 + layers_on_npu: 152 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: jep28dxp6 + total_layers: 152 + job_id: jnp126dlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,36 +53,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:23:15.414918Z' + timestamp: '2024-04-02T15:16:50.064065Z' torchscript_onnx_qnn: - inference_time: 763.0 - throughput: 1310.615989515072 + inference_time: 1013.0 + throughput: 987.1668311944719 estimated_peak_memory_range: - min: 12288 - max: 1709784 + min: 806912 + max: 21114408 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 151 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: jogkz0ygd + total_layers: 197 + job_id: j0px9xe9p job_status: Passed - torchscript_onnx_tflite: - inference_time: 571.0 - throughput: 1751.3134851138354 + inference_time: 679.0 + throughput: 1472.7540500736377 estimated_peak_memory_range: - min: 12288 - max: 51661744 + min: 16384 + max: 52478672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 151 + layers_on_npu: 152 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: j2p0y92gw + total_layers: 152 + job_id: jz5729vrp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,38 +91,38 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:27:44.926097Z' + timestamp: '2024-04-02T15:19:31.887081Z' torchscript_onnx_qnn: - inference_time: 547.0 - throughput: 1828.1535648994516 + inference_time: 722.0 + throughput: 1385.0415512465374 estimated_peak_memory_range: - min: 12288 - max: 52066480 + min: 802816 + max: 55474400 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 151 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: j1gln8epv + total_layers: 197 + job_id: jegn0krm5 job_status: Passed - name: MediaPipeHandLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 1047.0 - throughput: 955.1098376313277 + inference_time: 1204.0 + throughput: 830.5647840531561 estimated_peak_memory_range: - min: 28672 - max: 2017000 + min: 20480 + max: 2109720 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 158 + layers_on_npu: 159 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 158 - job_id: jqpye2rgy + total_layers: 159 + job_id: jvgdn2rl5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,36 +131,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:35:00.465711Z' + timestamp: '2024-04-02T15:27:36.652846Z' torchscript_onnx_qnn: - inference_time: 996.0 - throughput: 1004.0160642570281 + inference_time: 1286.0 + throughput: 777.6049766718507 estimated_peak_memory_range: - min: 24576 - max: 10650592 + min: 16384 + max: 10322344 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 158 + layers_on_npu: 210 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 158 - job_id: jn5q81757 + total_layers: 210 + job_id: jo5me8vqp job_status: Passed - torchscript_onnx_tflite: - inference_time: 749.0 - throughput: 1335.1134846461948 + inference_time: 892.0 + throughput: 1121.0762331838564 estimated_peak_memory_range: - min: 16384 - max: 54372320 + min: 12288 + max: 56429024 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 158 + layers_on_npu: 159 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 158 - job_id: j1p8orzg9 + total_layers: 159 + job_id: jqp4n3jlg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,19 +169,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:36:54.085694Z' + timestamp: '2024-04-02T15:30:18.255922Z' torchscript_onnx_qnn: - inference_time: 747.0 - throughput: 1338.6880856760374 + inference_time: 967.0 + throughput: 1034.126163391934 estimated_peak_memory_range: - min: 12288 - max: 53941536 + min: 802816 + max: 63039088 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 158 + layers_on_npu: 210 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 158 - job_id: jw566mv5o + total_layers: 210 + job_id: jopr6w1ep job_status: Passed diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml index f642b95b..9a3dfc0e 100644 --- a/qai_hub_models/models/mediapipe_pose/perf.yaml +++ b/qai_hub_models/models/mediapipe_pose/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: MediaPipePoseDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 806.0 - throughput: 1240.6947890818858 + inference_time: 832.0 + throughput: 1201.923076923077 estimated_peak_memory_range: - min: 24576 - max: 1736000 + min: 12288 + max: 4854072 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 106 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 106 - job_id: jygzelzg8 + total_layers: 107 + job_id: jep2xe3mg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,36 +53,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:54:08.518654Z' + timestamp: '2024-04-02T16:08:14.918160Z' torchscript_onnx_qnn: - inference_time: 808.0 - throughput: 1237.6237623762377 + inference_time: 888.0 + throughput: 1126.126126126126 estimated_peak_memory_range: - min: 28672 - max: 4909504 + min: 12288 + max: 15880848 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 106 + layers_on_npu: 140 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 106 - job_id: jvgdwdk5j + total_layers: 140 + job_id: jogkv8rop job_status: Passed - torchscript_onnx_tflite: - inference_time: 577.0 - throughput: 1733.102253032929 + inference_time: 595.0 + throughput: 1680.672268907563 estimated_peak_memory_range: - min: 65536 - max: 39641680 + min: 61440 + max: 40000256 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 106 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 106 - job_id: jmg9vzq57 + total_layers: 107 + job_id: j2p046eeg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,38 +91,38 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:58:30.195464Z' + timestamp: '2024-04-02T16:09:48.929940Z' torchscript_onnx_qnn: - inference_time: 577.0 - throughput: 1733.102253032929 + inference_time: 635.0 + throughput: 1574.8031496062993 estimated_peak_memory_range: - min: 61440 - max: 40004608 + min: 0 + max: 42314640 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 106 + layers_on_npu: 140 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 106 - job_id: jqp4qyqgo + total_layers: 140 + job_id: j1gl4lel5 job_status: Passed - name: MediaPipePoseLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 1052.0 - throughput: 950.5703422053232 + inference_time: 1234.0 + throughput: 810.3727714748784 estimated_peak_memory_range: - min: 16384 - max: 2847296 + min: 24576 + max: 2060312 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 229 + layers_on_npu: 230 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 229 - job_id: jz5wolzp1 + total_layers: 230 + job_id: jqpyzmv4g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,36 +131,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:04:16.645350Z' + timestamp: '2024-04-02T16:14:29.934608Z' torchscript_onnx_qnn: - inference_time: 1063.0 - throughput: 940.7337723424271 + inference_time: 1299.0 + throughput: 769.8229407236336 estimated_peak_memory_range: - min: 12288 - max: 2768272 + min: 16384 + max: 16124312 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 229 + layers_on_npu: 306 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 229 - job_id: jz57zeqp3 + total_layers: 306 + job_id: jn5q0v9mp job_status: Passed - torchscript_onnx_tflite: - inference_time: 756.0 - throughput: 1322.7513227513227 + inference_time: 897.0 + throughput: 1114.8272017837235 estimated_peak_memory_range: - min: 12288 - max: 84633232 + min: 16384 + max: 87742400 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 229 + layers_on_npu: 230 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 229 - job_id: jnp10nk5q + total_layers: 230 + job_id: j1p821w8p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,19 +169,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:06:07.166564Z' + timestamp: '2024-04-02T16:16:04.740529Z' torchscript_onnx_qnn: - inference_time: 772.0 - throughput: 1295.3367875647668 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: - min: 12288 - max: 84377840 + min: 815104 + max: 87180432 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 229 + layers_on_npu: 306 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 229 - job_id: j0pxvljg7 + total_layers: 306 + job_id: jw562wq7g job_status: Passed diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml index 8e81b1b0..6234bae5 100644 --- a/qai_hub_models/models/mediapipe_selfie/perf.yaml +++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: MediaPipe-Selfie-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 821.0 - throughput: 1218.026796589525 + inference_time: 811.0 + throughput: 1233.0456226880394 estimated_peak_memory_range: min: 12288 - max: 2051880 + max: 1889216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j1p3kox52 + job_id: j1p3n6qz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:06:02.750038Z' + timestamp: '2024-04-02T16:00:20.583232Z' torchscript_onnx_qnn: - inference_time: 805.0 - throughput: 1242.2360248447205 + inference_time: 774.0 + throughput: 1291.9896640826873 estimated_peak_memory_range: - min: 815104 - max: 4449664 + min: 802816 + max: 90946416 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 139 + layers_on_npu: 138 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 139 - job_id: j1pv3275x + total_layers: 138 + job_id: j1pvq7zmg job_status: Passed - torchscript_onnx_tflite: - inference_time: 555.0 - throughput: 1801.8018018018017 + inference_time: 554.0 + throughput: 1805.0541516245487 estimated_peak_memory_range: min: 12288 - max: 22552848 + max: 22551568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jwgoyd458 + job_id: jwgoz8edp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:09:04.960914Z' + timestamp: '2024-04-02T16:02:48.415603Z' torchscript_onnx_qnn: - inference_time: 550.0 - throughput: 1818.1818181818182 + inference_time: 529.0 + throughput: 1890.359168241966 estimated_peak_memory_range: min: 176128 - max: 42597216 + max: 41833568 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 139 + layers_on_npu: 138 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 139 - job_id: jlpe967gr + total_layers: 138 + job_id: j7gjdqk8g job_status: Passed diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml index 9536af51..f37b382f 100644 --- a/qai_hub_models/models/mnasnet05/perf.yaml +++ b/qai_hub_models/models/mnasnet05/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: MNASNet05 performance_metrics: - torchscript_onnx_tflite: - inference_time: 383.0 - throughput: 2610.9660574412533 + inference_time: 370.0 + throughput: 2702.7027027027025 estimated_peak_memory_range: min: 20480 - max: 1718480 + max: 2386016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j1p8o1qg9 + job_id: jlpeoy40g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:31:16.872390Z' + timestamp: '2024-04-02T16:00:21.394093Z' torchscript_onnx_qnn: - inference_time: 358.0 - throughput: 2793.2960893854747 + inference_time: 362.0 + throughput: 2762.4309392265195 estimated_peak_memory_range: - min: 634880 - max: 4722696 + min: 12288 + max: 120863224 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 102 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 102 - job_id: jn5q8ve57 + total_layers: 101 + job_id: jz5ww4mj5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 282.0 - throughput: 3546.099290780142 + inference_time: 277.0 + throughput: 3610.1083032490974 estimated_peak_memory_range: - min: 12288 - max: 44089552 + min: 24576 + max: 44606688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jogkz8vgd + job_id: jygz2nv6g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:34:31.653300Z' + timestamp: '2024-04-02T16:02:47.948402Z' torchscript_onnx_qnn: - inference_time: 260.0 - throughput: 3846.153846153846 + inference_time: 258.0 + throughput: 3875.968992248062 estimated_peak_memory_range: - min: 0 - max: 33635600 + min: 618496 + max: 37367488 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 102 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 102 - job_id: j1glnl2pv + total_layers: 101 + job_id: jmg90d9vg job_status: Passed diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml index eaee0237..e96cc1f6 100644 --- a/qai_hub_models/models/mobilenet_v2/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: MobileNet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 540.0 - throughput: 1851.851851851852 + inference_time: 549.0 + throughput: 1821.4936247723133 estimated_peak_memory_range: min: 12288 - max: 1921936 + max: 1985288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jygzeyzg8 + job_id: jnp126qlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:53:26.997975Z' + timestamp: '2024-04-02T15:43:39.892190Z' torchscript_onnx_qnn: - inference_time: 808.0 - throughput: 1237.6237623762377 + inference_time: 805.0 + throughput: 1242.2360248447205 estimated_peak_memory_range: - min: 622592 - max: 6011376 + min: 12288 + max: 197702240 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jmg9v2q57 + total_layers: 103 + job_id: jz57296rp job_status: Passed - torchscript_onnx_tflite: - inference_time: 393.0 - throughput: 2544.529262086514 + inference_time: 394.0 + throughput: 2538.0710659898477 estimated_peak_memory_range: min: 12288 - max: 55502880 + max: 56118336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jz5wozzp1 + job_id: jvgdn27l5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:57:53.996541Z' + timestamp: '2024-04-02T15:46:19.700460Z' torchscript_onnx_qnn: - inference_time: 537.0 - throughput: 1862.1973929236499 + inference_time: 535.0 + throughput: 1869.1588785046729 estimated_peak_memory_range: min: 618496 - max: 37101856 + max: 36988752 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jnp101k5q + total_layers: 103 + job_id: jqp4n38lg job_status: Passed diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml index 55ab7eff..0fa1fea2 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: MobileNet-v2-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 237.0 - throughput: 4219.4092827004215 + inference_time: 234.0 + throughput: 4273.504273504273 estimated_peak_memory_range: min: 12288 - max: 1520264 + max: 1572504 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j1p3klz52 + job_id: j0px9xm9p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:42:55.837359Z' + timestamp: '2024-04-02T16:00:56.088303Z' torchscript_onnx_qnn: - inference_time: 352.0 - throughput: 2840.909090909091 + inference_time: 349.0 + throughput: 2865.3295128939826 estimated_peak_memory_range: - min: 135168 - max: 94316568 + min: 167936 + max: 46798608 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 69 + layers_on_npu: 68 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 69 - job_id: j1pv3ym5x + total_layers: 68 + job_id: jegn0kxm5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 168.0 - throughput: 5952.380952380952 + inference_time: 196.0 + throughput: 5102.040816326531 estimated_peak_memory_range: min: 12288 - max: 35960128 + max: 36730112 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jwgoy7d58 + job_id: jo5me84qp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:47:22.207861Z' + timestamp: '2024-04-02T16:03:12.952170Z' torchscript_onnx_qnn: - inference_time: 253.0 - throughput: 3952.5691699604745 + inference_time: 245.0 + throughput: 4081.6326530612246 estimated_peak_memory_range: min: 163840 - max: 35983856 + max: 32368720 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 69 + layers_on_npu: 68 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 69 - job_id: j7gjx68pd + total_layers: 68 + job_id: jopr6w9ep job_status: Passed diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml index a03a69e5..59f65ffd 100644 --- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: MobileNet-v3-Large performance_metrics: - torchscript_onnx_tflite: - inference_time: 603.0 - throughput: 1658.374792703151 + inference_time: 600.0 + throughput: 1666.6666666666667 estimated_peak_memory_range: min: 12288 - max: 2319320 + max: 1649368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jnp10025q + job_id: jep2xejmg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:10:33.448407Z' + timestamp: '2024-04-02T16:02:57.989143Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 433.0 - throughput: 2309.4688221709007 + inference_time: 424.0 + throughput: 2358.490566037736 estimated_peak_memory_range: - min: 12288 - max: 60000912 + min: 16384 + max: 60523168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jvgdwwe5j + job_id: jqpyzmn4g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:10:33.448414Z' + timestamp: '2024-04-02T16:02:57.989157Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml index 724d7aa9..7ad36609 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: MobileNet-v3-Large-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2972.0 - throughput: 336.47375504710635 + inference_time: 2909.0 + throughput: 343.7607425232039 estimated_peak_memory_range: - min: 12288 - max: 3564432 + min: 1351680 + max: 5759640 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 136 + layers_on_npu: 134 layers_on_gpu: 0 layers_on_cpu: 15 - total_layers: 151 - job_id: j1pv3m75x + total_layers: 149 + job_id: j2p046keg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:39:39.924043Z' + timestamp: '2024-04-02T15:35:49.786504Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 2352.0 - throughput: 425.1700680272109 + inference_time: 2580.0 + throughput: 387.5968992248062 estimated_peak_memory_range: - min: 0 - max: 46180704 + min: 12288 + max: 45919040 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 136 + layers_on_npu: 134 layers_on_gpu: 0 layers_on_cpu: 15 - total_layers: 151 - job_id: jlpe9x7gr + total_layers: 149 + job_id: j1p82188p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:39:39.924051Z' + timestamp: '2024-04-02T15:35:49.786517Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml index 20a2652a..e705083d 100644 --- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: MobileNet-v3-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 424.0 - throughput: 2358.490566037736 + inference_time: 421.0 + throughput: 2375.296912114014 estimated_peak_memory_range: min: 36864 - max: 1921728 + max: 8536712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jlpe900gr + job_id: jogkv8dop job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:22:40.354876Z' + timestamp: '2024-04-02T15:24:45.951383Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 288.0 - throughput: 3472.222222222222 + inference_time: 312.0 + throughput: 3205.128205128205 estimated_peak_memory_range: min: 12288 - max: 40067360 + max: 40933232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jygzeq6g8 + job_id: jn5q0vwmp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:22:40.354885Z' + timestamp: '2024-04-02T15:24:45.951396Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml index 2a6ddaed..a547260a 100644 --- a/qai_hub_models/models/openai_clip/perf.yaml +++ b/qai_hub_models/models/openai_clip/perf.yaml @@ -17,22 +17,26 @@ aggregated: - Samsung Galaxy S23 - Samsung Galaxy S23 Ultra - Samsung Galaxy S23+ + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro supported_chipsets: - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 3 - Snapdragon® 888 models: - name: CLIPTextEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 15528.0 - throughput: 64.39979392065945 + inference_time: 15437.0 + throughput: 64.77942605428515 estimated_peak_memory_range: - min: 40960 - max: 3106072 + min: 16384 + max: 3773072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -40,14 +44,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: j2p0m2veg + job_id: j1gl4l7l5 job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-04-02T15:38:20.337723Z' torchscript_onnx_qnn: - inference_time: 8149.0 - throughput: 122.71444348999877 + inference_time: 8102.0 + throughput: 123.42631449024933 estimated_peak_memory_range: - min: 40960 - max: 23728064 + min: 32768 + max: 20779640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -55,53 +67,121 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jogk2q9og + job_id: j1pvq74mg + job_status: Passed + - torchscript_onnx_tflite: + inference_time: 11180.0 + throughput: 89.44543828264759 + estimated_peak_memory_range: + min: 16384 + max: 221118656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 574 + layers_on_gpu: 0 + layers_on_cpu: 2 + total_layers: 576 + job_id: j1p3n68z5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 Ultra - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-02-21T16:25:08.294036Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-04-02T15:41:03.797053Z' + torchscript_onnx_qnn: + inference_time: 5698.0 + throughput: 175.5001755001755 + estimated_peak_memory_range: + min: 12288 + max: 143619840 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 377 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 377 + job_id: jlpeoy20g + job_status: Passed - name: CLIPImageEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 127729.0 - throughput: 7.829075621041424 + inference_time: 126791.0 + throughput: 7.886995133724002 estimated_peak_memory_range: - min: 159744 - max: 3867320 + min: 163840 + max: 4397144 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 575 + layers_on_npu: 576 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 575 - job_id: j1p8em48p + total_layers: 576 + job_id: jw562wv7g job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-04-02T15:49:16.432598Z' torchscript_onnx_qnn: - inference_time: 50903.0 - throughput: 19.645207551617784 + inference_time: 50465.0 + throughput: 19.815713861091847 estimated_peak_memory_range: - min: 86016 - max: 59741752 + min: 57344 + max: 62046320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 370 + layers_on_npu: 371 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 370 - job_id: jn5qlrmmp + total_layers: 371 + job_id: j7gjdq18g + job_status: Passed + - torchscript_onnx_tflite: + inference_time: 96475.0 + throughput: 10.365379632029024 + estimated_peak_memory_range: + min: 266240 + max: 867371232 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 576 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 576 + job_id: jwgoz8mdp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 Ultra - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-02-21T16:30:00.084732Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-04-02T15:52:00.458152Z' + torchscript_onnx_qnn: + inference_time: 38292.0 + throughput: 26.115115428810196 + estimated_peak_memory_range: + min: 643072 + max: 228425888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 371 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 371 + job_id: jygz2nw6g + job_status: Passed diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml index 6f677f3e..2dc7eb61 100644 --- a/qai_hub_models/models/openpose/perf.yaml +++ b/qai_hub_models/models/openpose/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: OpenPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 11718.0 - throughput: 85.33879501621438 + inference_time: 11734.0 + throughput: 85.22243054371911 estimated_peak_memory_range: - min: 229376 - max: 2888976 + min: 237568 + max: 2523304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j1pvokj5x + job_id: jw562wm6g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:20:33.773079Z' + timestamp: '2024-04-02T15:49:44.284847Z' torchscript_onnx_qnn: - inference_time: 11832.0 - throughput: 84.51656524678837 + inference_time: 11894.0 + throughput: 84.07600470825626 estimated_peak_memory_range: - min: 643072 - max: 242325320 + min: 618496 + max: 231521112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 187 + layers_on_npu: 186 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 187 - job_id: jlpe1m15r + total_layers: 186 + job_id: jwgoz8wqp job_status: Passed - torchscript_onnx_tflite: - inference_time: 8755.0 - throughput: 114.22044545973729 + inference_time: 8768.0 + throughput: 114.05109489051095 estimated_peak_memory_range: - min: 192512 - max: 33307600 + min: 217088 + max: 33805040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j7gjmnxgd + job_id: j1p3n6735 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:22:29.557459Z' + timestamp: '2024-04-02T15:52:25.633149Z' torchscript_onnx_qnn: - inference_time: 8772.0 - throughput: 113.99908800729594 + inference_time: 8773.0 + throughput: 113.98609369656901 estimated_peak_memory_range: min: 618496 - max: 53437584 + max: 53251968 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 187 + layers_on_npu: 186 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 187 - job_id: jygz9dk58 + total_layers: 186 + job_id: j1pvq7nkg job_status: Passed diff --git a/qai_hub_models/models/protocols.py b/qai_hub_models/models/protocols.py index 86b53dc9..adfdcf5e 100644 --- a/qai_hub_models/models/protocols.py +++ b/qai_hub_models/models/protocols.py @@ -80,10 +80,10 @@ def quantize( self, data: _DataLoader, num_samples: int | None = None, - evaluator: BaseEvaluator | None = None, device: str = "cpu", requantize_model_weights=False, - ) -> float | None: + data_has_gt=False, + ) -> None: """ Compute quantization encodings for this model with the given dataset and model evaluator. @@ -106,18 +106,15 @@ def quantize( Number of samples to use for evaluation. One sample is one iteration from iter(data). If none, defaults to the number of samples in the dataset. - evaluator: BaseModelEvaluator | None - Evaluator to populate while quantizing the data. - If not provided, an evaluator is not used. - device: str Name of device on which inference should be run. requantize_model_weights: bool If a weight is quantized, recompute its quantization parameters. - Returns: - If an evaluator is provided, returns its accuracy score. No return value otherwise. + data_has_gt: bool + Set to true if the data loader passed in also provides ground truth data. + The ground truth data will be discarded for quantization. """ ... @@ -180,7 +177,7 @@ def from_pretrained( ... -class PretrainedHubModelProtocol(HubModelProtocol, FromPretrainedProtocol): +class PretrainedHubModelProtocol(HubModelProtocol, FromPretrainedProtocol, Protocol): """ All pretrained AI Hub Models must, at minimum, implement this interface. """ diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml index d7559b5d..9ae38b52 100644 --- a/qai_hub_models/models/quicksrnetlarge/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: QuickSRNetLarge performance_metrics: - torchscript_onnx_tflite: - inference_time: 2500.0 - throughput: 400.0 + inference_time: 2479.0 + throughput: 403.3884630899556 estimated_peak_memory_range: min: 16384 - max: 1492864 + max: 2082976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jn5q8l757 + job_id: jz5ww4xj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:06:24.010143Z' + timestamp: '2024-04-02T15:38:35.151948Z' torchscript_onnx_qnn: - inference_time: 2109.0 - throughput: 474.158368895211 + inference_time: 2103.0 + throughput: 475.51117451260103 estimated_peak_memory_range: - min: 16384 - max: 5120280 + min: 212992 + max: 70526200 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 32 + layers_on_npu: 31 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 32 - job_id: jw5668v5o + total_layers: 31 + job_id: jnp1263lg job_status: Passed - torchscript_onnx_tflite: - inference_time: 1780.0 - throughput: 561.7977528089888 + inference_time: 1724.0 + throughput: 580.046403712297 estimated_peak_memory_range: - min: 20480 - max: 27633264 + min: 16384 + max: 28078224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j1glnyepv + job_id: jmg90d8vg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:10:42.992618Z' + timestamp: '2024-04-02T15:41:14.086310Z' torchscript_onnx_qnn: - inference_time: 1506.0 - throughput: 664.0106241699867 + inference_time: 1489.0 + throughput: 671.591672263264 estimated_peak_memory_range: - min: 208896 - max: 18546960 + min: 204800 + max: 18038672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 32 + layers_on_npu: 31 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 32 - job_id: j1p3kzx52 + total_layers: 31 + job_id: jvgdn20l5 job_status: Passed diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml index 174979a6..190f1b2f 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: QuickSRNetLarge-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1868.0 - throughput: 535.3319057815846 + inference_time: 1505.0 + throughput: 664.4518272425249 estimated_peak_memory_range: - min: 12288 - max: 1533296 + min: 20480 + max: 1674904 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 30 + layers_on_npu: 28 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 33 - job_id: jygze66g8 + total_layers: 31 + job_id: jz5ww4865 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:21:00.166706Z' + timestamp: '2024-04-02T15:46:59.414680Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 1484.0 - throughput: 673.8544474393531 + inference_time: 1194.0 + throughput: 837.5209380234506 estimated_peak_memory_range: - min: 20480 - max: 25007104 + min: 12288 + max: 25612592 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 30 + layers_on_npu: 28 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 33 - job_id: jz5wokjp1 + total_layers: 31 + job_id: jmg90dklg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:21:00.166728Z' + timestamp: '2024-04-02T15:46:59.414694Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml index f1316110..48845da9 100644 --- a/qai_hub_models/models/quicksrnetmedium/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml @@ -2,11 +2,11 @@ models: - name: QuickSRNetMedium performance_metrics: - torchscript_onnx_tflite: - inference_time: 1398.0 - throughput: 715.307582260372 + inference_time: 1386.0 + throughput: 721.5007215007215 estimated_peak_memory_range: - min: 16384 - max: 8236496 + min: 24576 + max: 8284584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -14,7 +14,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jwgoy9d58 + job_id: jnp12672g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -23,28 +23,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:36.328807Z' + timestamp: '2024-04-02T15:43:54.390470Z' torchscript_onnx_qnn: - inference_time: 989.0 - throughput: 1011.1223458038422 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: min: 212992 - max: 7267624 + max: 7518912 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 18 + layers_on_npu: 17 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 18 - job_id: j7gjx88pd + total_layers: 17 + job_id: jz5729klp job_status: Passed - torchscript_onnx_tflite: - inference_time: 935.0 - throughput: 1069.51871657754 + inference_time: 899.0 + throughput: 1112.3470522803113 estimated_peak_memory_range: min: 16384 - max: 19630352 + max: 19609168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -52,7 +52,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j1pv3nm5x + job_id: jvgdn28e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -61,21 +61,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:15:44.251341Z' + timestamp: '2024-04-02T15:46:31.914821Z' torchscript_onnx_qnn: - inference_time: 648.0 - throughput: 1543.20987654321 + inference_time: 651.0 + throughput: 1536.0983102918588 estimated_peak_memory_range: min: 208896 - max: 14213744 + max: 15417120 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 18 + layers_on_npu: 17 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 18 - job_id: jlpe9n0gr + total_layers: 17 + job_id: jqp4n3mvg job_status: Passed aggregated: supported_oses: @@ -98,6 +98,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml index 2843af27..acb0f171 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml @@ -2,19 +2,19 @@ models: - name: QuickSRNetMedium-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1411.0 - throughput: 708.7172218284904 + inference_time: 1054.0 + throughput: 948.7666034155598 estimated_peak_memory_range: - min: 28672 - max: 1545320 + min: 12288 + max: 1550760 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 19 - job_id: joprkj950 + total_layers: 17 + job_id: j0px9x31p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -23,7 +23,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:24:06.170051Z' + timestamp: '2024-04-02T15:22:27.279987Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -40,19 +40,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 1149.0 - throughput: 870.3220191470845 + inference_time: 854.0 + throughput: 1170.96018735363 estimated_peak_memory_range: - min: 20480 - max: 20002352 + min: 12288 + max: 19670544 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 19 - job_id: jep28n4p6 + total_layers: 17 + job_id: jo5me8owp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -61,7 +61,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:24:06.170059Z' + timestamp: '2024-04-02T15:22:27.280002Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -98,6 +98,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml index d0815180..ae0aa28c 100644 --- a/qai_hub_models/models/quicksrnetsmall/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: QuickSRNetSmall performance_metrics: - torchscript_onnx_tflite: - inference_time: 1338.0 - throughput: 747.3841554559043 + inference_time: 1324.0 + throughput: 755.2870090634441 estimated_peak_memory_range: - min: 24576 - max: 1376064 + min: 16384 + max: 15227496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jygzezzg8 + job_id: jegn0kor5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:09:24.184304Z' + timestamp: '2024-04-02T15:38:53.058069Z' torchscript_onnx_qnn: - inference_time: 1025.0 - throughput: 975.609756097561 + inference_time: 1010.0 + throughput: 990.0990099009902 estimated_peak_memory_range: - min: 212992 - max: 37245776 + min: 225280 + max: 8292184 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 12 + layers_on_npu: 11 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 12 - job_id: jnp10ok5q + total_layers: 11 + job_id: jep2xe44g job_status: Passed - torchscript_onnx_tflite: - inference_time: 839.0 - throughput: 1191.8951132300358 + inference_time: 939.0 + throughput: 1064.9627263045793 estimated_peak_memory_range: min: 16384 - max: 17771072 + max: 18004576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jmg9voq57 + job_id: jopr6wo9p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:13:39.690790Z' + timestamp: '2024-04-02T15:41:32.292172Z' torchscript_onnx_qnn: - inference_time: 616.0 - throughput: 1623.3766233766235 + inference_time: 621.0 + throughput: 1610.3059581320451 estimated_peak_memory_range: - min: 212992 - max: 14001568 + min: 229376 + max: 14179680 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 12 + layers_on_npu: 11 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 12 - job_id: jz57zoqp3 + total_layers: 11 + job_id: jqpyzmq7g job_status: Passed diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml index 8766199c..257316bd 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml @@ -2,19 +2,19 @@ models: - name: QuickSRNetSmall-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1355.0 - throughput: 738.0073800738007 + inference_time: 992.0 + throughput: 1008.0645161290323 estimated_peak_memory_range: min: 20480 - max: 2224928 + max: 3845112 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 10 + layers_on_npu: 8 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 13 - job_id: jz57zknp3 + total_layers: 11 + job_id: j2p046d6g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -23,7 +23,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:22:40.346377Z' + timestamp: '2024-04-02T15:30:25.941729Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -40,19 +40,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 1099.0 - throughput: 909.9181073703367 + inference_time: 806.0 + throughput: 1240.6947890818858 estimated_peak_memory_range: - min: 20480 - max: 20205264 + min: 12288 + max: 18429184 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 10 + layers_on_npu: 8 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 13 - job_id: jqp4qm2go + total_layers: 11 + job_id: j1p8216xp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -61,7 +61,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:22:40.346384Z' + timestamp: '2024-04-02T15:30:25.941742Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -98,6 +98,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml index c79aa05c..c9e1c58a 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml +++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Real-ESRGAN-General-x4v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 7285.0 - throughput: 137.26835964310226 + inference_time: 7135.0 + throughput: 140.1541695865452 estimated_peak_memory_range: - min: 15745024 - max: 20241416 + min: 15785984 + max: 26631064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j1glno2pv + job_id: jn5q0vz4p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:36.720476Z' + timestamp: '2024-04-02T15:40:09.101637Z' torchscript_onnx_qnn: - inference_time: 6983.0 - throughput: 143.20492624946297 + inference_time: 7001.0 + throughput: 142.836737608913 estimated_peak_memory_range: - min: 12288 - max: 10852600 + min: 57344 + max: 12072040 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 73 + layers_on_npu: 72 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 73 - job_id: j1p3kxm52 + total_layers: 72 + job_id: jw562wr0g job_status: Passed - torchscript_onnx_tflite: - inference_time: 5660.0 - throughput: 176.67844522968198 + inference_time: 5501.0 + throughput: 181.78512997636793 estimated_peak_memory_range: - min: 57344 - max: 53042192 + min: 20480 + max: 55158288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jw566rn5o + job_id: j1gl4lo85 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:17:08.081378Z' + timestamp: '2024-04-02T15:42:50.185115Z' torchscript_onnx_qnn: - inference_time: 4939.0 - throughput: 202.47013565499088 + inference_time: 4936.0 + throughput: 202.5931928687196 estimated_peak_memory_range: - min: 208896 - max: 32676160 + min: 12288 + max: 31063632 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 73 + layers_on_npu: 72 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 73 - job_id: jwgoyo158 + total_layers: 72 + job_id: j1p3n6xl5 job_status: Passed diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml index 3bc0350c..898e3e2c 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml +++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -52,13 +53,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:52.767646Z' + timestamp: '2024-04-02T16:05:08.717805Z' torchscript_onnx_qnn: - inference_time: 66635.0 - throughput: 15.007128385983343 + inference_time: 66817.0 + throughput: 14.966251103761019 estimated_peak_memory_range: - min: 94208 - max: 104137800 + min: 139264 + max: 108335072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -66,7 +67,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: jz57zzlp3 + job_id: j1pvq7ejg job_status: Passed - torchscript_onnx_tflite: inference_time: 'null' @@ -90,13 +91,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:16:11.670851Z' + timestamp: '2024-04-02T16:06:57.492677Z' torchscript_onnx_qnn: - inference_time: 50978.0 - throughput: 19.61630507277649 + inference_time: 51292.0 + throughput: 19.49621773375965 estimated_peak_memory_range: - min: 90112 - max: 248878432 + min: 73728 + max: 258670240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -104,5 +105,5 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: jqp4qqvgo + job_id: jygz2n8kg job_status: Passed diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml index f5814c03..87d5ea45 100644 --- a/qai_hub_models/models/regnet/perf.yaml +++ b/qai_hub_models/models/regnet/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: RegNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1974.0 - throughput: 506.5856129685917 + inference_time: 1895.0 + throughput: 527.7044854881267 estimated_peak_memory_range: - min: 32768 - max: 1789416 + min: 180224 + max: 45308848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jqpyey4gy + job_id: jmg90dxlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:08:16.252038Z' + timestamp: '2024-04-02T15:33:39.883573Z' torchscript_onnx_qnn: - inference_time: 1675.0 - throughput: 597.0149253731344 + inference_time: 1662.0 + throughput: 601.6847172081829 estimated_peak_memory_range: - min: 241664 - max: 59486296 + min: 622592 + max: 60403520 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 187 + layers_on_npu: 186 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 187 - job_id: j1p8ok8g9 + total_layers: 186 + job_id: jvgdn2ze5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 1359.0 - throughput: 735.8351729212657 + inference_time: 1348.0 + throughput: 741.839762611276 estimated_peak_memory_range: - min: 16384 - max: 131931280 + min: 12288 + max: 134684864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j2p0yxegw + job_id: jnp126v2g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:11:23.127753Z' + timestamp: '2024-04-02T15:36:21.856849Z' torchscript_onnx_qnn: - inference_time: 1197.0 - throughput: 835.421888053467 + inference_time: 1192.0 + throughput: 838.9261744966443 estimated_peak_memory_range: min: 618496 - max: 68520544 + max: 71874880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 187 + layers_on_npu: 186 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 187 - job_id: jogkzkogd + total_layers: 186 + job_id: j0px9xd1p job_status: Passed diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml index cd12a3b8..4aa9bd34 100644 --- a/qai_hub_models/models/resnet101/perf.yaml +++ b/qai_hub_models/models/resnet101/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2993.0 - throughput: 334.1129301703976 + inference_time: 2981.0 + throughput: 335.4579000335458 estimated_peak_memory_range: - min: 28672 - max: 1903408 + min: 20480 + max: 2256968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j7gjxmxpd + job_id: j2p046v6g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:51:59.720577Z' + timestamp: '2024-04-02T15:33:00.839758Z' torchscript_onnx_qnn: - inference_time: 2921.0 - throughput: 342.3485107839781 + inference_time: 2909.0 + throughput: 343.7607425232039 estimated_peak_memory_range: - min: 622592 - max: 226849752 + min: 626688 + max: 228487056 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 244 + layers_on_npu: 243 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 244 - job_id: jygze9kg8 + total_layers: 243 + job_id: jogkv892p job_status: Passed - torchscript_onnx_tflite: - inference_time: 2221.0 - throughput: 450.24763619990995 + inference_time: 2205.0 + throughput: 453.51473922902494 estimated_peak_memory_range: - min: 16384 - max: 103000720 + min: 12288 + max: 105917360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jlpe911gr + job_id: j1p8214xp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:54:07.329383Z' + timestamp: '2024-04-02T15:35:41.997410Z' torchscript_onnx_qnn: - inference_time: 2126.0 - throughput: 470.36688617121354 + inference_time: 2129.0 + throughput: 469.7040864255519 estimated_peak_memory_range: min: 618496 - max: 71779728 + max: 73551600 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 244 + layers_on_npu: 243 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 244 - job_id: jz5won6p1 + total_layers: 243 + job_id: jn5q0vm4p job_status: Passed diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml index f5bd3459..600692d7 100644 --- a/qai_hub_models/models/resnet101_quantized/perf.yaml +++ b/qai_hub_models/models/resnet101_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNet101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1122.0 - throughput: 891.2655971479501 + inference_time: 1103.0 + throughput: 906.6183136899365 estimated_peak_memory_range: - min: 12288 - max: 2141424 + min: 40960 + max: 1823040 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jvgdw7z5j + job_id: j1gl4l185 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:05:31.212967Z' + timestamp: '2024-04-02T15:59:57.701225Z' torchscript_onnx_qnn: - inference_time: 1101.0 - throughput: 908.2652134423251 + inference_time: 1097.0 + throughput: 911.5770282588878 estimated_peak_memory_range: - min: 12288 - max: 196790880 + min: 20480 + max: 197523472 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 144 + layers_on_npu: 143 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: jvgdw765j + total_layers: 143 + job_id: jwgoz84xp job_status: Passed - torchscript_onnx_tflite: - inference_time: 839.0 - throughput: 1191.8951132300358 + inference_time: 863.0 + throughput: 1158.7485515643104 estimated_peak_memory_range: - min: 12288 - max: 91234848 + min: 20480 + max: 92174016 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jmg9v9m57 + job_id: jw562wd0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:07:20.101134Z' + timestamp: '2024-04-02T16:02:25.700326Z' torchscript_onnx_qnn: - inference_time: 830.0 - throughput: 1204.8192771084337 + inference_time: 817.0 + throughput: 1223.9902080783354 estimated_peak_memory_range: min: 167936 - max: 53969312 + max: 53912720 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 144 + layers_on_npu: 143 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: jo5mrv7gk + total_layers: 143 + job_id: j1pvq79jg job_status: Passed diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml index 0bc59fb1..df22cd5c 100644 --- a/qai_hub_models/models/resnet18/perf.yaml +++ b/qai_hub_models/models/resnet18/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNet18 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1053.0 - throughput: 949.667616334283 + inference_time: 1038.0 + throughput: 963.3911368015414 estimated_peak_memory_range: - min: 32768 - max: 2028832 + min: 16384 + max: 1682008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 36 - job_id: j2p0y8egw + job_id: j7gjdqwxg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:41:20.341762Z' + timestamp: '2024-04-02T15:32:51.123052Z' torchscript_onnx_qnn: - inference_time: 989.0 - throughput: 1011.1223458038422 + inference_time: 985.0 + throughput: 1015.2284263959391 estimated_peak_memory_range: - min: 12288 - max: 84688848 + min: 16384 + max: 95517680 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 52 + layers_on_npu: 51 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 52 - job_id: jogkzwogd + total_layers: 51 + job_id: jygz2n4kg job_status: Passed - torchscript_onnx_tflite: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 772.0 + throughput: 1295.3367875647668 estimated_peak_memory_range: min: 12288 - max: 23627952 + max: 23648144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 36 - job_id: j1p8od8g9 + job_id: jlpeoyl1g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:45:48.504221Z' + timestamp: '2024-04-02T15:35:30.038080Z' torchscript_onnx_qnn: - inference_time: 717.0 - throughput: 1394.700139470014 + inference_time: 716.0 + throughput: 1396.6480446927374 estimated_peak_memory_range: - min: 630784 - max: 25268288 + min: 622592 + max: 26671488 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 52 + layers_on_npu: 51 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 52 - job_id: jn5q8xm57 + total_layers: 51 + job_id: jz5ww4465 job_status: Passed diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml index df4b298b..28c184e4 100644 --- a/qai_hub_models/models/resnet18_quantized/perf.yaml +++ b/qai_hub_models/models/resnet18_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNet18Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 356.0 - throughput: 2808.9887640449438 + inference_time: 355.0 + throughput: 2816.9014084507044 estimated_peak_memory_range: min: 12288 - max: 1529808 + max: 1475416 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: j1p3k8m52 + job_id: jmg90ddlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:59:49.885782Z' + timestamp: '2024-04-02T15:27:39.960726Z' torchscript_onnx_qnn: - inference_time: 354.0 - throughput: 2824.858757062147 + inference_time: 368.0 + throughput: 2717.391304347826 estimated_peak_memory_range: - min: 20480 - max: 62738248 + min: 0 + max: 206530616 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 35 + layers_on_npu: 34 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 35 - job_id: j1pv34z5x + total_layers: 34 + job_id: jvgdn22e5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 301.0 - throughput: 3322.2591362126245 + inference_time: 294.0 + throughput: 3401.360544217687 estimated_peak_memory_range: min: 12288 - max: 23414560 + max: 23142320 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: jwgoym158 + job_id: jnp12662g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:03:01.789875Z' + timestamp: '2024-04-02T15:30:23.722899Z' torchscript_onnx_qnn: - inference_time: 282.0 - throughput: 3546.099290780142 + inference_time: 287.0 + throughput: 3484.320557491289 estimated_peak_memory_range: min: 12288 - max: 21538672 + max: 22112096 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 35 + layers_on_npu: 34 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 35 - job_id: j7gjx11pd + total_layers: 34 + job_id: jz57299lp job_status: Passed diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml index 0037092b..0069d307 100644 --- a/qai_hub_models/models/resnet50/perf.yaml +++ b/qai_hub_models/models/resnet50/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1898.0 - throughput: 526.8703898840885 + inference_time: 1893.0 + throughput: 528.2620179609086 estimated_peak_memory_range: - min: 36864 - max: 2234848 + min: 24576 + max: 2236144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j2p0yk0gw + job_id: jqp4n33vg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:27:02.086108Z' + timestamp: '2024-04-02T16:00:02.772259Z' torchscript_onnx_qnn: - inference_time: 1790.0 - throughput: 558.659217877095 + inference_time: 1782.0 + throughput: 561.1672278338945 estimated_peak_memory_range: - min: 626688 - max: 186659664 + min: 618496 + max: 186769968 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 125 + layers_on_npu: 124 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 125 - job_id: jogkzdvgd + total_layers: 124 + job_id: jo5me88wp job_status: Passed - torchscript_onnx_tflite: - inference_time: 1392.0 - throughput: 718.3908045977012 + inference_time: 1410.0 + throughput: 709.2198581560284 estimated_peak_memory_range: - min: 16384 - max: 68731008 + min: 0 + max: 68342016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j1p8o8qg9 + job_id: j0px9xx1p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:28:58.275338Z' + timestamp: '2024-04-02T16:02:27.391913Z' torchscript_onnx_qnn: - inference_time: 1307.0 - throughput: 765.1109410864575 + inference_time: 1303.0 + throughput: 767.4597083653108 estimated_peak_memory_range: - min: 0 - max: 45987408 + min: 618496 + max: 49585360 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 125 + layers_on_npu: 124 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 125 - job_id: jn5q8we57 + total_layers: 124 + job_id: jegn0kkr5 job_status: Passed diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml index d8d493ff..55ab55e5 100644 --- a/qai_hub_models/models/resnext101/perf.yaml +++ b/qai_hub_models/models/resnext101/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNeXt101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 6315.0 - throughput: 158.3531274742676 + inference_time: 6465.0 + throughput: 154.67904098994586 estimated_peak_memory_range: - min: 28672 - max: 2570472 + min: 32768 + max: 2912504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j2p0yrngw + job_id: j1pvq77jg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:40:16.043830Z' + timestamp: '2024-04-02T15:51:13.215900Z' torchscript_onnx_qnn: - inference_time: 6079.0 - throughput: 164.50074025333114 + inference_time: 6084.0 + throughput: 164.3655489809336 estimated_peak_memory_range: min: 16384 - max: 34444952 + max: 36270640 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 244 + layers_on_npu: 243 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 244 - job_id: jogkzyngd + total_layers: 243 + job_id: jlpeoyy1g job_status: Passed - torchscript_onnx_tflite: - inference_time: 4552.0 - throughput: 219.6836555360281 + inference_time: 4520.0 + throughput: 221.23893805309734 estimated_peak_memory_range: min: 20480 - max: 357156576 + max: 364641136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j1p8o7og9 + job_id: j7gjdqqxg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:42:21.825443Z' + timestamp: '2024-04-02T15:53:55.219027Z' torchscript_onnx_qnn: - inference_time: 4377.0 - throughput: 228.4669865204478 + inference_time: 4424.0 + throughput: 226.03978300180833 estimated_peak_memory_range: min: 618496 - max: 123852368 + max: 130132064 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 244 + layers_on_npu: 243 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 244 - job_id: jn5q82o57 + total_layers: 243 + job_id: jygz2nnkg job_status: Passed diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml index ddc514fb..5dd59834 100644 --- a/qai_hub_models/models/resnext101_quantized/perf.yaml +++ b/qai_hub_models/models/resnext101_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNeXt101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2842.0 - throughput: 351.8648838845883 + inference_time: 2844.0 + throughput: 351.6174402250352 estimated_peak_memory_range: - min: 16384 - max: 1739432 + min: 0 + max: 2204768 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jygzekkg8 + job_id: jz5ww4765 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:42:18.013006Z' + timestamp: '2024-04-02T15:41:28.974979Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 2088.0 - throughput: 478.9272030651341 + inference_time: 2070.0 + throughput: 483.09178743961354 estimated_peak_memory_range: - min: 36864 - max: 251955536 + min: 12288 + max: 261887168 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jnp1lz25q + job_id: jmg90dmlg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:42:18.013015Z' + timestamp: '2024-04-02T15:41:28.974992Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml index 806199a4..88ae002a 100644 --- a/qai_hub_models/models/resnext50/perf.yaml +++ b/qai_hub_models/models/resnext50/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNeXt50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2118.0 - throughput: 472.14353163361665 + inference_time: 2186.0 + throughput: 457.45654162854527 estimated_peak_memory_range: - min: 16384 - max: 2846256 + min: 20480 + max: 2564264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j7gjxq1pd + job_id: jnp126j2g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:21:42.740361Z' + timestamp: '2024-04-02T16:00:40.598047Z' torchscript_onnx_qnn: - inference_time: 2081.0 - throughput: 480.5382027871216 + inference_time: 2074.0 + throughput: 482.1600771456123 estimated_peak_memory_range: - min: 12288 - max: 67945728 + min: 622592 + max: 68996704 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 125 + layers_on_npu: 124 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 125 - job_id: jygzen4g8 + total_layers: 124 + job_id: jz57294lp job_status: Passed - torchscript_onnx_tflite: - inference_time: 1551.0 - throughput: 644.7453255963894 + inference_time: 1561.0 + throughput: 640.6149903907751 estimated_peak_memory_range: - min: 16384 - max: 161276560 + min: 12288 + max: 164341728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jlpe9y8gr + job_id: jvgdn23e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:23:42.732818Z' + timestamp: '2024-04-02T16:03:00.522367Z' torchscript_onnx_qnn: inference_time: 1518.0 throughput: 658.7615283267457 estimated_peak_memory_range: min: 618496 - max: 57881488 + max: 60133216 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 125 + layers_on_npu: 124 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 125 - job_id: jz5wo44p1 + total_layers: 124 + job_id: jqp4n31vg job_status: Passed diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml index 67cbf162..baff69e3 100644 --- a/qai_hub_models/models/resnext50_quantized/perf.yaml +++ b/qai_hub_models/models/resnext50_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: ResNeXt50Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 874.0 - throughput: 1144.1647597254005 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: min: 12288 - max: 1920376 + max: 1573712 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jegn27jgo + job_id: j0px9x41p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:10:31.822073Z' + timestamp: '2024-04-02T15:13:52.082055Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 656.0 - throughput: 1524.3902439024391 + inference_time: 683.0 + throughput: 1464.1288433382138 estimated_peak_memory_range: min: 12288 - max: 96222112 + max: 98876096 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: joprknk50 + job_id: jo5me8mwp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:10:31.822087Z' + timestamp: '2024-04-02T15:13:52.082068Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml index 6ea06c1e..41f476b9 100644 --- a/qai_hub_models/models/sam/perf.yaml +++ b/qai_hub_models/models/sam/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: SAMDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 16761.0 - throughput: 59.66231131794046 + inference_time: 57777.0 + throughput: 17.30792529899441 estimated_peak_memory_range: - min: 42115072 - max: 92806968 - primary_compute_unit: GPU + min: 5091328 + max: 14286200 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 356 - layers_on_cpu: 9 + layers_on_npu: 364 + layers_on_gpu: 1 + layers_on_cpu: 0 total_layers: 365 - job_id: jmg9vkm57 + job_id: jegn0knr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:10:36.748428Z' + timestamp: '2024-04-02T15:41:53.968079Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 13794.0 - throughput: 72.4952878062926 + inference_time: 39989.0 + throughput: 25.006876891145065 estimated_peak_memory_range: - min: 41951232 - max: 94062064 - primary_compute_unit: GPU + min: 16384 + max: 209265456 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 356 - layers_on_cpu: 9 + layers_on_npu: 364 + layers_on_gpu: 1 + layers_on_cpu: 0 total_layers: 365 - job_id: jnp107n5q + job_id: jopr6w09p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:10:36.748439Z' + timestamp: '2024-04-02T15:41:53.968093Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml index dc0d7cac..710b92ab 100644 --- a/qai_hub_models/models/sesr_m5/perf.yaml +++ b/qai_hub_models/models/sesr_m5/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: SESR-M5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2245.0 - throughput: 445.43429844097994 + inference_time: 2254.0 + throughput: 443.6557231588287 estimated_peak_memory_range: - min: 28672 - max: 9857128 + min: 24576 + max: 8961568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jwgoyjd58 + job_id: jep2xew4g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:02:55.130462Z' + timestamp: '2024-04-02T15:43:53.479475Z' torchscript_onnx_qnn: - inference_time: 2136.0 - throughput: 468.1647940074906 + inference_time: 2137.0 + throughput: 467.94571829667757 estimated_peak_memory_range: - min: 221184 - max: 3873216 + min: 24576 + max: 6179792 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 32 + layers_on_npu: 31 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 32 - job_id: j7gjxj8pd + total_layers: 31 + job_id: j2p046j6g job_status: Passed - torchscript_onnx_tflite: - inference_time: 1533.0 - throughput: 652.3157208088714 + inference_time: 1589.0 + throughput: 629.3266205160478 estimated_peak_memory_range: min: 16384 - max: 23601872 + max: 24462352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j1pv3jm5x + job_id: jqpyzmx7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:04:43.299283Z' + timestamp: '2024-04-02T15:46:31.476167Z' torchscript_onnx_qnn: - inference_time: 1462.0 - throughput: 683.9945280437756 + inference_time: 1456.0 + throughput: 686.8131868131868 estimated_peak_memory_range: - min: 208896 - max: 20706112 + min: 212992 + max: 22416672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 32 + layers_on_npu: 31 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 32 - job_id: jlpe9j0gr + total_layers: 31 + job_id: j1p821xxp job_status: Passed diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml index ba9de102..e57234b7 100644 --- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: SESR-M5-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1749.0 - throughput: 571.7552887364208 + inference_time: 1357.0 + throughput: 736.9196757553427 estimated_peak_memory_range: - min: 28672 - max: 6325016 + min: 12288 + max: 3744312 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 13 + layers_on_npu: 11 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 16 - job_id: joprk1k50 + total_layers: 14 + job_id: jogkv842p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:59:28.460705Z' + timestamp: '2024-04-02T15:13:49.881073Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 1403.0 - throughput: 712.7583749109052 + inference_time: 1112.0 + throughput: 899.2805755395683 estimated_peak_memory_range: - min: 20480 - max: 21054176 + min: 12288 + max: 22134720 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 13 + layers_on_npu: 11 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 16 - job_id: jep2836p6 + total_layers: 14 + job_id: jn5q0vy4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:59:28.460714Z' + timestamp: '2024-04-02T15:13:49.881086Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml index 525e8689..83d6b8a1 100644 --- a/qai_hub_models/models/shufflenet_v2/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Shufflenet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 919.0 - throughput: 1088.139281828074 + inference_time: 917.0 + throughput: 1090.5125408942204 estimated_peak_memory_range: min: 12288 - max: 2065312 + max: 6661376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 202 - job_id: j2p0y1ngw + job_id: j1gl4lx85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:00:07.928895Z' + timestamp: '2024-04-02T15:55:19.973155Z' torchscript_onnx_qnn: - inference_time: 322.0 - throughput: 3105.590062111801 + inference_time: 310.0 + throughput: 3225.8064516129034 estimated_peak_memory_range: - min: 626688 - max: 3731328 + min: 12288 + max: 87860112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 157 + layers_on_npu: 156 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 157 - job_id: jogkzlngd + total_layers: 156 + job_id: j1p3n69l5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 586.0 - throughput: 1706.4846416382252 + inference_time: 583.0 + throughput: 1715.2658662092624 estimated_peak_memory_range: - min: 16384 - max: 32832960 + min: 12288 + max: 33769008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 202 - job_id: j1p8o3og9 + job_id: jw562w70g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:03:17.860163Z' + timestamp: '2024-04-02T15:58:01.979025Z' torchscript_onnx_qnn: - inference_time: 225.0 - throughput: 4444.444444444444 + inference_time: 223.0 + throughput: 4484.304932735426 estimated_peak_memory_range: min: 12288 - max: 48449136 + max: 48783840 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 157 + layers_on_npu: 156 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 157 - job_id: jn5q87o57 + total_layers: 156 + job_id: jwgoz8rxp job_status: Passed diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml index 6459f84c..1771e749 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Shufflenet-v2Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 579.0 - throughput: 1727.1157167530225 + inference_time: 557.0 + throughput: 1795.3321364452424 estimated_peak_memory_range: - min: 16384 - max: 4558296 + min: 12288 + max: 1902352 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: j1p89yxg9 + job_id: j1pvq7djg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:32:39.995361Z' + timestamp: '2024-04-02T15:16:43.037345Z' torchscript_onnx_qnn: - inference_time: 279.0 - throughput: 3584.2293906810037 + inference_time: 274.0 + throughput: 3649.6350364963505 estimated_peak_memory_range: - min: 0 - max: 75494608 + min: 12288 + max: 69529408 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 120 + layers_on_npu: 119 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 120 - job_id: j1glzm8pv + total_layers: 119 + job_id: jlpeoyz1g job_status: Passed - torchscript_onnx_tflite: - inference_time: 385.0 - throughput: 2597.4025974025976 + inference_time: 404.0 + throughput: 2475.2475247524753 estimated_peak_memory_range: min: 12288 - max: 21664192 + max: 22095680 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jn5qkq457 + job_id: j7gjdq7xg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:34:41.782968Z' + timestamp: '2024-04-02T15:19:24.193795Z' torchscript_onnx_qnn: - inference_time: 204.0 - throughput: 4901.9607843137255 + inference_time: 194.0 + throughput: 5154.639175257732 estimated_peak_memory_range: min: 163840 - max: 41738848 + max: 40609808 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 120 + layers_on_npu: 119 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 120 - job_id: jw56j40po + total_layers: 119 + job_id: jygz2nmkg job_status: Passed diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml index 914dbe7c..2b578457 100644 --- a/qai_hub_models/models/sinet/perf.yaml +++ b/qai_hub_models/models/sinet/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: SINet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1809.0 - throughput: 552.791597567717 + inference_time: 1813.0 + throughput: 551.5719801434087 estimated_peak_memory_range: - min: 20480 - max: 2244048 + min: 28672 + max: 2078752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jw566wn5o + job_id: jz5ww4l65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:17:54.436410Z' + timestamp: '2024-04-02T15:16:44.533757Z' torchscript_onnx_qnn: - inference_time: 1193.0 - throughput: 838.2229673093043 + inference_time: 1195.0 + throughput: 836.8200836820083 estimated_peak_memory_range: - min: 20480 - max: 25094232 + min: 622592 + max: 58073808 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 187 + layers_on_npu: 186 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 187 - job_id: jwgoy8158 + total_layers: 186 + job_id: jnp126n2g job_status: Passed - torchscript_onnx_tflite: - inference_time: 1170.0 - throughput: 854.7008547008547 + inference_time: 1197.0 + throughput: 835.421888053467 estimated_peak_memory_range: min: 12288 - max: 24922736 + max: 25406400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j1p3k6m52 + job_id: jmg90dzlg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:22:20.419307Z' + timestamp: '2024-04-02T15:19:24.421326Z' torchscript_onnx_qnn: - inference_time: 802.0 - throughput: 1246.8827930174564 + inference_time: 798.0 + throughput: 1253.1328320802006 estimated_peak_memory_range: - min: 12288 - max: 65545232 + min: 0 + max: 67803232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 187 + layers_on_npu: 186 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 187 - job_id: j1pv37z5x + total_layers: 186 + job_id: jvgdn2de5 job_status: Passed diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml index 217d17b8..371fb3a7 100644 --- a/qai_hub_models/models/squeezenet1_1/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: SqueezeNet-1_1 performance_metrics: - torchscript_onnx_tflite: - inference_time: 225.0 - throughput: 4444.444444444444 + inference_time: 223.0 + throughput: 4484.304932735426 estimated_peak_memory_range: min: 24576 - max: 1431872 + max: 1673088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: j1p8ol8g9 + job_id: jz5ww4l35 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:32:29.373813Z' + timestamp: '2024-04-02T15:16:32.269958Z' torchscript_onnx_qnn: - inference_time: 278.0 - throughput: 3597.122302158273 + inference_time: 274.0 + throughput: 3649.6350364963505 estimated_peak_memory_range: - min: 20480 - max: 53223728 + min: 16384 + max: 8208760 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 69 + layers_on_npu: 68 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 69 - job_id: jn5q8jm57 + total_layers: 68 + job_id: jnp126n8g job_status: Passed - torchscript_onnx_tflite: - inference_time: 181.0 - throughput: 5524.861878453039 + inference_time: 182.0 + throughput: 5494.505494505494 estimated_peak_memory_range: min: 12288 - max: 21672448 + max: 21808400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jogkzjogd + job_id: jmg90dzwg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:34:27.758337Z' + timestamp: '2024-04-02T15:19:11.667370Z' torchscript_onnx_qnn: inference_time: 199.0 throughput: 5025.125628140703 estimated_peak_memory_range: min: 618496 - max: 28404384 + max: 28004672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 69 + layers_on_npu: 68 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 69 - job_id: j1glnjlpv + total_layers: 68 + job_id: jvgdn2dr5 job_status: Passed diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml index fcf427fe..4288b3c8 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: SqueezeNet-1_1Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 176.0 - throughput: 5681.818181818182 + inference_time: 150.0 + throughput: 6666.666666666667 estimated_peak_memory_range: min: 12288 - max: 2498992 + max: 1450328 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jnp10jk5q + job_id: jz5729evp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:48:25.505884Z' + timestamp: '2024-04-02T16:00:15.491720Z' torchscript_onnx_qnn: - inference_time: 185.0 - throughput: 5405.405405405405 + inference_time: 179.0 + throughput: 5586.592178770949 estimated_peak_memory_range: - min: 172032 - max: 55116856 + min: 12288 + max: 73066960 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 43 + layers_on_npu: 42 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 43 - job_id: jqp4q1qgo + total_layers: 42 + job_id: j0px9xl3p job_status: Passed - torchscript_onnx_tflite: - inference_time: 135.0 - throughput: 7407.407407407408 + inference_time: 130.0 + throughput: 7692.307692307692 estimated_peak_memory_range: min: 12288 - max: 21511824 + max: 21769728 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jz57z4qp3 + job_id: jqp4n3y8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:51:35.601938Z' + timestamp: '2024-04-02T16:02:39.628312Z' torchscript_onnx_qnn: - inference_time: 146.0 - throughput: 6849.315068493151 + inference_time: 155.0 + throughput: 6451.612903225807 estimated_peak_memory_range: - min: 159744 - max: 18650384 + min: 184320 + max: 20075776 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 43 + layers_on_npu: 42 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 43 - job_id: jo5mrmygk + total_layers: 42 + job_id: jo5me80dp job_status: Passed diff --git a/qai_hub_models/models/stylegan2/perf.yaml b/qai_hub_models/models/stylegan2/perf.yaml index 856842a6..f3512bff 100644 --- a/qai_hub_models/models/stylegan2/perf.yaml +++ b/qai_hub_models/models/stylegan2/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: StyleGAN2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1218362.0 - throughput: 0.8207741213202644 + inference_time: 1245465.0 + throughput: 0.8029129682488066 estimated_peak_memory_range: - min: 1358295040 - max: 1361471248 + min: 1583226880 + max: 1586523400 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 89 - layers_on_cpu: 492 - total_layers: 581 - job_id: jlpe988gr + layers_on_cpu: 478 + total_layers: 567 + job_id: jegn0kzk5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:03:19.171321Z' + timestamp: '2024-04-02T15:46:54.066866Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 980347.0 - throughput: 1.0200469833640537 + inference_time: 1030564.0 + throughput: 0.970342453258604 estimated_peak_memory_range: - min: 1110478848 - max: 1142166720 + min: 897953792 + max: 928847488 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 89 - layers_on_cpu: 492 - total_layers: 581 - job_id: jz5wo84p1 + layers_on_cpu: 478 + total_layers: 567 + job_id: jopr6wl0p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:03:19.171331Z' + timestamp: '2024-04-02T15:46:54.066880Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml index d28fc1c7..e8b290f4 100644 --- a/qai_hub_models/models/swin_base/perf.yaml +++ b/qai_hub_models/models/swin_base/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Swin-Base performance_metrics: - torchscript_onnx_tflite: - inference_time: 66948.0 - throughput: 14.936966003465376 + inference_time: 66984.0 + throughput: 14.928938253911381 estimated_peak_memory_range: - min: 28672 - max: 6112608 + min: 118784 + max: 4254288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1614 - job_id: jogkzm2gd + job_id: jep2xerrg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:23:06.160602Z' + timestamp: '2024-04-02T15:14:19.208239Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 43458.0 - throughput: 23.010722996916563 + inference_time: 43260.0 + throughput: 23.11604253351826 estimated_peak_memory_range: - min: 69632 - max: 472671520 + min: 90112 + max: 512533392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1614 - job_id: jn5q8o457 + job_id: jqpyzmo8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:23:06.160610Z' + timestamp: '2024-04-02T15:14:19.208253Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml index 2e615a93..b8fb40bd 100644 --- a/qai_hub_models/models/swin_small/perf.yaml +++ b/qai_hub_models/models/swin_small/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Swin-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 50143.0 - throughput: 19.94296312546118 + inference_time: 50305.0 + throughput: 19.87873968790379 estimated_peak_memory_range: - min: 90112 - max: 3612056 + min: 114688 + max: 3114440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1609 - job_id: jo5mr9ygk + job_id: j2p046m9g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:53:07.887698Z' + timestamp: '2024-04-02T15:59:03.699659Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 33054.0 - throughput: 30.2535245356084 + inference_time: 32975.0 + throughput: 30.32600454890068 estimated_peak_memory_range: min: 45056 - max: 454274336 + max: 479723312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1609 - job_id: jegn2qvgo + job_id: j1p821ekp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:53:07.887705Z' + timestamp: '2024-04-02T15:59:03.699672Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml index 6e71c1a1..8d0ce7dc 100644 --- a/qai_hub_models/models/swin_tiny/perf.yaml +++ b/qai_hub_models/models/swin_tiny/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Swin-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 31313.0 - throughput: 31.935617794526234 + inference_time: 31126.0 + throughput: 32.12748184797275 estimated_peak_memory_range: - min: 81920 - max: 3482152 + min: 53248 + max: 3289744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 859 - job_id: j0pxvv1g7 + job_id: jogkv82wp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:34:33.080588Z' + timestamp: '2024-04-02T15:46:54.220773Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 20716.0 - throughput: 48.27186715582159 + inference_time: 20461.0 + throughput: 48.87346659498558 estimated_peak_memory_range: - min: 49152 - max: 274521296 + min: 45056 + max: 293868864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 859 - job_id: jo5mrrwgk + job_id: jn5q0vlnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:34:33.080597Z' + timestamp: '2024-04-02T15:46:54.220786Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml index df5315f1..9bbfefcc 100644 --- a/qai_hub_models/models/trocr/perf.yaml +++ b/qai_hub_models/models/trocr/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: TrOCREncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 243976.0 - throughput: 4.098763812834049 + inference_time: 243112.0 + throughput: 4.1133304814241995 estimated_peak_memory_range: - min: 7221248 - max: 10173368 + min: 7290880 + max: 10682856 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 627 + layers_on_npu: 628 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 627 - job_id: j7gjxxxpd + total_layers: 628 + job_id: j1gl4lyj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:46:04.016709Z' + timestamp: '2024-04-02T15:25:12.572315Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 182193.0 - throughput: 5.48868507571641 + inference_time: 182195.0 + throughput: 5.488624825050084 estimated_peak_memory_range: - min: 20480 - max: 305620528 + min: 6701056 + max: 331600272 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 627 + layers_on_npu: 628 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 627 - job_id: jygzeekg8 + total_layers: 628 + job_id: j1p3n6z35 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:46:04.016721Z' + timestamp: '2024-04-02T15:25:12.572329Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -109,11 +110,11 @@ models: - name: TrOCRDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 2810.0 - throughput: 355.87188612099646 + inference_time: 2781.0 + throughput: 359.5828838547285 estimated_peak_memory_range: - min: 12288 - max: 2353880 + min: 28672 + max: 2706376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,7 +122,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 394 - job_id: jlpe991gr + job_id: jw562w86g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,7 +131,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:51:23.352323Z' + timestamp: '2024-04-02T15:30:38.723405Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -147,11 +148,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 2018.0 - throughput: 495.5401387512388 + inference_time: 1988.0 + throughput: 503.01810865191146 estimated_peak_memory_range: min: 12288 - max: 193404384 + max: 194199920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,7 +160,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 394 - job_id: jz5wov6p1 + job_id: jwgoz8lqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,7 +169,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:51:23.352351Z' + timestamp: '2024-04-02T15:30:38.723420Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml index 8ff61006..adf3048f 100644 --- a/qai_hub_models/models/unet_segmentation/perf.yaml +++ b/qai_hub_models/models/unet_segmentation/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Unet-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 160694.0 - throughput: 6.223007704083538 + inference_time: 159721.0 + throughput: 6.260917474846764 estimated_peak_memory_range: min: 6688768 - max: 229291048 + max: 230831992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jlpe9rvgr + job_id: j1pvq72kg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:14:31.434457Z' + timestamp: '2024-04-02T16:00:44.075564Z' torchscript_onnx_qnn: - inference_time: 146509.0 - throughput: 6.825519251377049 + inference_time: 143885.0 + throughput: 6.949994787503909 estimated_peak_memory_range: - min: 10952704 - max: 44981480 + min: 9871360 + max: 36840448 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 52 + layers_on_npu: 51 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 52 - job_id: jmg9v3857 + total_layers: 51 + job_id: jlpeoy6og job_status: Passed - torchscript_onnx_tflite: - inference_time: 118830.0 - throughput: 8.415383320710259 + inference_time: 113226.0 + throughput: 8.831893734654585 estimated_peak_memory_range: - min: 6234112 - max: 344093584 + min: 4681728 + max: 361817664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jz5wodmp1 + job_id: j7gjdq3vg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:16:26.988161Z' + timestamp: '2024-04-02T16:03:02.410135Z' torchscript_onnx_qnn: - inference_time: 110459.0 - throughput: 9.053132836618111 + inference_time: 110489.0 + throughput: 9.050674727800958 estimated_peak_memory_range: - min: 328994816 - max: 420473984 + min: 9871360 + max: 113176352 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 52 + layers_on_npu: 51 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 52 - job_id: jnp10d75q + total_layers: 51 + job_id: jygz2nzog job_status: Passed diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml index 6a9f6f8d..a8690001 100644 --- a/qai_hub_models/models/vit/perf.yaml +++ b/qai_hub_models/models/vit/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: VIT performance_metrics: - torchscript_onnx_tflite: - inference_time: 136110.0 - throughput: 7.346998751010212 + inference_time: 135551.0 + throughput: 7.3772971058863455 estimated_peak_memory_range: - min: 86016 - max: 3893632 + min: 167936 + max: 4072768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: j1gln9lpv + job_id: jz5ww4y35 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:40:01.517909Z' + timestamp: '2024-04-02T15:59:30.586343Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 100287.0 - throughput: 9.971382133277494 + inference_time: 100385.0 + throughput: 9.96164765652239 estimated_peak_memory_range: - min: 163840 - max: 401162112 + min: 172032 + max: 414376288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jw566975o + job_id: jmg90dowg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:40:01.517918Z' + timestamp: '2024-04-02T15:59:30.586357Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml index 5831fb37..f9493249 100644 --- a/qai_hub_models/models/whisper_base_en/perf.yaml +++ b/qai_hub_models/models/whisper_base_en/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 154406.0 - throughput: 6.476432262994962 + inference_time: 154210.0 + throughput: 6.484663770183516 estimated_peak_memory_range: - min: 36892672 - max: 232224176 + min: 11546624 + max: 113388704 primary_compute_unit: GPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 315 layers_on_cpu: 0 total_layers: 315 - job_id: jqp4q0vgo + job_id: jnp126o8g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:10:43.748935Z' + timestamp: '2024-04-02T15:35:51.877964Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 120437.0 - throughput: 8.303096224582147 + inference_time: 124136.0 + throughput: 8.055680866146806 estimated_peak_memory_range: - min: 36777984 - max: 66087104 + min: 35241984 + max: 63999568 primary_compute_unit: GPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 315 layers_on_cpu: 0 total_layers: 315 - job_id: jo5mrywgk + job_id: jz5729ovp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:10:43.748943Z' + timestamp: '2024-04-02T15:35:51.877979Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -109,19 +110,19 @@ models: - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 14139.0 - throughput: 70.72635971426551 + inference_time: 14069.0 + throughput: 71.0782571611344 estimated_peak_memory_range: - min: 3051520 - max: 5712920 + min: 5812224 + max: 8998208 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 433 + layers_on_npu: 459 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 433 - job_id: j0pxv21g7 + layers_on_cpu: 2 + total_layers: 461 + job_id: jvgdn26r5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,7 +131,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:16:05.499826Z' + timestamp: '2024-04-02T15:41:26.044149Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -147,19 +148,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 10614.0 - throughput: 94.21518748822311 + inference_time: 10562.0 + throughput: 94.6790380609733 estimated_peak_memory_range: - min: 2019328 - max: 96045024 + min: 4571136 + max: 110220208 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 433 + layers_on_npu: 459 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 433 - job_id: jegn28rgo + layers_on_cpu: 2 + total_layers: 461 + job_id: jqp4n3e8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,7 +169,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:16:05.499836Z' + timestamp: '2024-04-02T15:41:26.044164Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml index 549cec62..3db7b156 100644 --- a/qai_hub_models/models/whisper_small_en/perf.yaml +++ b/qai_hub_models/models/whisper_small_en/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 602022.0 - throughput: 1.6610688645929883 + inference_time: 598326.0 + throughput: 1.6713296764640013 estimated_peak_memory_range: - min: 12288 - max: 448965896 + min: 95817728 + max: 535321856 primary_compute_unit: GPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 609 layers_on_cpu: 0 total_layers: 609 - job_id: jvgdw4k5j + job_id: j0px9x03p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:29:10.773412Z' + timestamp: '2024-04-02T15:36:02.927472Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 461601.0 - throughput: 2.1663731231084853 + inference_time: 469347.0 + throughput: 2.1306197759866365 estimated_peak_memory_range: - min: 14163968 - max: 46674320 + min: 28250112 + max: 60097088 primary_compute_unit: GPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 609 layers_on_cpu: 0 total_layers: 609 - job_id: jnp101l5q + job_id: jegn0k1k5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:29:10.773421Z' + timestamp: '2024-04-02T15:36:02.927486Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -109,19 +110,19 @@ models: - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 45479.0 - throughput: 21.988170364343983 + inference_time: 46381.0 + throughput: 21.560552812574116 estimated_peak_memory_range: - min: 8577024 - max: 12019040 + min: 16228352 + max: 19790512 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 853 + layers_on_npu: 903 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 853 - job_id: jz5wozjp1 + layers_on_cpu: 2 + total_layers: 905 + job_id: jo5me89dp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,7 +131,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:33:07.115194Z' + timestamp: '2024-04-02T15:41:42.406080Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -147,20 +148,20 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 'null' - throughput: 'null' + inference_time: 34412.0 + throughput: 29.059630361501803 estimated_peak_memory_range: - min: 0 - max: 0 - primary_compute_unit: 'null' - precision: 'null' + min: 20180992 + max: 1716349552 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 903 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 0 - job_id: jvgdw4l5j - job_status: Failed + layers_on_cpu: 2 + total_layers: 905 + job_id: jopr6wx0p + job_status: Passed reference_device_info: name: Samsung Galaxy S24 os: '14' @@ -168,7 +169,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:33:07.115203Z' + timestamp: '2024-04-02T15:41:42.406095Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml index a00f22a5..af454619 100644 --- a/qai_hub_models/models/whisper_tiny_en/perf.yaml +++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 67350.0 - throughput: 14.847809948032666 + inference_time: 69083.0 + throughput: 14.475341256170115 estimated_peak_memory_range: - min: 11608064 - max: 57976544 + min: 2011136 + max: 68110808 primary_compute_unit: GPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 217 layers_on_cpu: 0 total_layers: 217 - job_id: jz57zx9p3 + job_id: jep2xeorg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:03:16.946141Z' + timestamp: '2024-04-02T15:14:01.654956Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 53449.0 - throughput: 18.709423936836984 + inference_time: 53036.0 + throughput: 18.855117278829475 estimated_peak_memory_range: min: 0 - max: 27656928 + max: 25669392 primary_compute_unit: GPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 217 layers_on_cpu: 0 total_layers: 217 - job_id: jegn23qgo + job_id: j2p046o9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:03:16.946150Z' + timestamp: '2024-04-02T15:14:01.654971Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -109,19 +110,19 @@ models: - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 7423.0 - throughput: 134.71642193183348 + inference_time: 7365.0 + throughput: 135.77732518669382 estimated_peak_memory_range: - min: 1634304 - max: 4170776 + min: 3002368 + max: 5408984 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 293 + layers_on_npu: 311 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 293 - job_id: jqp4qv1go + layers_on_cpu: 2 + total_layers: 313 + job_id: jqpyzm88g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,7 +131,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:09:31.853789Z' + timestamp: '2024-04-02T15:19:28.253886Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -147,19 +148,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 5570.0 - throughput: 179.53321364452424 + inference_time: 5492.0 + throughput: 182.0830298616169 estimated_peak_memory_range: - min: 466944 - max: 230273920 + min: 20480 + max: 233538800 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 293 + layers_on_npu: 311 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 293 - job_id: joprke750 + layers_on_cpu: 2 + total_layers: 313 + job_id: j1p821jkp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,7 +169,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:09:31.853814Z' + timestamp: '2024-04-02T15:19:28.253901Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml index 6dbe8a6c..0b48ca6f 100644 --- a/qai_hub_models/models/wideresnet50/perf.yaml +++ b/qai_hub_models/models/wideresnet50/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: WideResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 4401.0 - throughput: 227.22108611679164 + inference_time: 4786.0 + throughput: 208.94274968658587 estimated_peak_memory_range: - min: 20480 - max: 2132848 + min: 28672 + max: 170961592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jwgoyr458 + job_id: jogkv86wp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:38.602998Z' + timestamp: '2024-04-02T15:27:33.204984Z' torchscript_onnx_qnn: - inference_time: 4580.0 - throughput: 218.34061135371178 + inference_time: 4614.0 + throughput: 216.7316861725184 estimated_peak_memory_range: - min: 618496 - max: 323904968 + min: 643072 + max: 314223792 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 125 + layers_on_npu: 124 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 125 - job_id: j7gjx77pd + total_layers: 124 + job_id: j1gl4lwj5 job_status: Passed - torchscript_onnx_tflite: - inference_time: 3306.0 - throughput: 302.48033877797945 + inference_time: 3605.0 + throughput: 277.39251040221916 estimated_peak_memory_range: - min: 16384 - max: 94385296 + min: 20480 + max: 96019088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j1pv3d75x + job_id: jn5q0v4np job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:15:48.204812Z' + timestamp: '2024-04-02T15:30:15.130717Z' torchscript_onnx_qnn: - inference_time: 3413.0 - throughput: 292.99736302373276 + inference_time: 3410.0 + throughput: 293.2551319648094 estimated_peak_memory_range: min: 618496 - max: 52379088 + max: 53769616 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 125 + layers_on_npu: 124 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 125 - job_id: jlpe9z7gr + total_layers: 124 + job_id: jw562wo6g job_status: Passed diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml index b597a1f7..922c65c0 100644 --- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: WideResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1767.0 - throughput: 565.9309564233164 + inference_time: 1771.0 + throughput: 564.652738565782 estimated_peak_memory_range: - min: 24576 - max: 1759936 + min: 12288 + max: 2057600 primary_compute_unit: NPU precision: int8 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jz5wo4zp1 + job_id: jwgoz8dqp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:12:41.207435Z' + timestamp: '2024-04-02T15:50:01.927438Z' torchscript_onnx_qnn: - inference_time: 1707.0 - throughput: 585.8230814294083 + inference_time: 1722.0 + throughput: 580.7200929152149 estimated_peak_memory_range: - min: 28672 - max: 479496224 + min: 16384 + max: 480044216 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 76 + layers_on_npu: 75 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 76 - job_id: j0pxvxjg7 + total_layers: 75 + job_id: j7gjdqyvg job_status: Passed - torchscript_onnx_tflite: - inference_time: 1322.0 - throughput: 756.4296520423601 + inference_time: 1346.0 + throughput: 742.9420505200594 estimated_peak_memory_range: - min: 16384 - max: 54559456 + min: 12288 + max: 55534992 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jvgdw2k5j + job_id: j1pvq7mkg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:17:15.136644Z' + timestamp: '2024-04-02T15:52:43.890803Z' torchscript_onnx_qnn: - inference_time: 1291.0 - throughput: 774.5933384972889 + inference_time: 1290.0 + throughput: 775.1937984496124 estimated_peak_memory_range: min: 167936 - max: 41865680 + max: 41464352 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 76 + layers_on_npu: 75 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 76 - job_id: jogkz4ygd + total_layers: 75 + job_id: jlpeoyxog job_status: Passed diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml index 3f267734..6c1f73cb 100644 --- a/qai_hub_models/models/xlsr/perf.yaml +++ b/qai_hub_models/models/xlsr/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: XLSR performance_metrics: - torchscript_onnx_tflite: - inference_time: 2508.0 - throughput: 398.72408293460927 + inference_time: 2520.0 + throughput: 396.8253968253968 estimated_peak_memory_range: - min: 16384 - max: 9569248 + min: 28672 + max: 1367248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jz57z6np3 + job_id: jygz2nyog job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:14:58.320277Z' + timestamp: '2024-04-02T15:55:01.326907Z' torchscript_onnx_qnn: - inference_time: 987.0 - throughput: 1013.1712259371834 + inference_time: 971.0 + throughput: 1029.8661174047375 estimated_peak_memory_range: - min: 2121728 - max: 10203592 + min: 217088 + max: 67726144 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 22 + layers_on_npu: 21 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 22 - job_id: j0pxvm8g7 + total_layers: 21 + job_id: jmg90d2wg job_status: Passed - torchscript_onnx_tflite: - inference_time: 1996.0 - throughput: 501.00200400801606 + inference_time: 1798.0 + throughput: 556.1735261401557 estimated_peak_memory_range: min: 16384 - max: 19879696 + max: 19705360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jqp4q82go + job_id: jz5ww4z35 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:16:54.078428Z' + timestamp: '2024-04-02T15:57:38.953477Z' torchscript_onnx_qnn: - inference_time: 631.0 - throughput: 1584.7860538827258 + inference_time: 628.0 + throughput: 1592.3566878980891 estimated_peak_memory_range: - min: 225280 - max: 18045792 + min: 208896 + max: 18068960 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 22 + layers_on_npu: 21 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 22 - job_id: jegn2xjgo + total_layers: 21 + job_id: jnp12618g job_status: Passed diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml index 9bf56e2f..e659b98b 100644 --- a/qai_hub_models/models/xlsr_quantized/perf.yaml +++ b/qai_hub_models/models/xlsr_quantized/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: XLSR-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1349.0 - throughput: 741.2898443291327 + inference_time: 1152.0 + throughput: 868.0555555555555 estimated_peak_memory_range: - min: 28672 - max: 1726904 + min: 77824 + max: 1569664 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 19 - job_id: j1p3k3l52 + total_layers: 17 + job_id: jvgdn24r5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:58:09.460010Z' + timestamp: '2024-04-02T15:52:42.432436Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 1084.0 - throughput: 922.509225092251 + inference_time: 927.0 + throughput: 1078.7486515641856 estimated_peak_memory_range: - min: 20480 - max: 21010912 + min: 16384 + max: 20315072 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 19 - job_id: jwgoy0x58 + total_layers: 17 + job_id: jz5729nvp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:58:09.460020Z' + timestamp: '2024-04-02T15:52:42.432450Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/yolov6/model.py b/qai_hub_models/models/yolov6/model.py index cf836d44..c30783cd 100644 --- a/qai_hub_models/models/yolov6/model.py +++ b/qai_hub_models/models/yolov6/model.py @@ -31,19 +31,22 @@ class YoloV6(BaseModel): """Exportable YoloV6 bounding box detector, end-to-end.""" - def __init__(self, model: nn.Module) -> None: + def __init__(self, model: nn.Module, include_postprocessing: bool = True) -> None: super().__init__() self.model = model + self.include_postprocessing = include_postprocessing # All image input spatial dimensions should be a multiple of this stride. STRIDE_MULTIPLE = 32 @classmethod - def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS): + def from_pretrained( + cls, ckpt_name: str = DEFAULT_WEIGHTS, include_postprocessing: bool = True + ): model_url = f"{WEIGHTS_PATH}{ckpt_name}" asset = CachedWebModelAsset(model_url, MODEL_ID, MODEL_ASSET_VERSION, ckpt_name) model = _load_yolov6_source_model_from_weights(asset) - return cls(model) + return cls(model, include_postprocessing) def forward(self, image: torch.Tensor): """ @@ -55,11 +58,23 @@ def forward(self, image: torch.Tensor): 3-channel Color Space: RGB Returns: - boxes: Shape [batch, num preds, 4] where 4 == (center_x, center_y, w, h) - class scores multiplied by confidence: Shape [batch, num_preds, # of classes (typically 80)] + If self.include_postprocessing: + boxes: Shape [batch, num preds, 4] where 4 == (center_x, center_y, w, h) + classes: class scores multiplied by confidence: Shape [batch, num_preds, # of classes (typically 80)] + + Otherwise: + detector_output: torch.Tensor + Shape is [batch, num_preds, k] + where, k = # of classes + 5 + k is structured as follows [box_coordinates (4) , conf (1) , # of classes] + and box_coordinates are [x_center, y_center, w, h] """ predictions = self.model(image) - return detect_postprocess(predictions) + return ( + detect_postprocess(predictions) + if self.include_postprocessing + else predictions + ) @staticmethod def get_input_spec( diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml index 93dc9a2a..07a1bfa8 100644 --- a/qai_hub_models/models/yolov6/perf.yaml +++ b/qai_hub_models/models/yolov6/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Yolo-v6 performance_metrics: - torchscript_onnx_tflite: - inference_time: 8480.0 - throughput: 117.9245283018868 + inference_time: 7224.0 + throughput: 138.42746400885935 estimated_peak_memory_range: - min: 24576 - max: 3130456 + min: 53248 + max: 8291064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jz5wo0jp1 + job_id: jqp4n348g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:29:36.439969Z' + timestamp: '2024-04-02T15:38:46.956110Z' torchscript_onnx_qnn: - inference_time: 7275.0 - throughput: 137.4570446735395 + inference_time: 6898.0 + throughput: 144.96955639315743 estimated_peak_memory_range: - min: 4939776 - max: 18286232 + min: 5578752 + max: 19291464 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 230 + layers_on_npu: 229 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 230 - job_id: jnp10kl5q + total_layers: 229 + job_id: jo5me8kdp job_status: Passed - torchscript_onnx_tflite: - inference_time: 6051.0 - throughput: 165.26194017517767 + inference_time: 5152.0 + throughput: 194.09937888198758 estimated_peak_memory_range: - min: 16384 - max: 74357488 + min: 36864 + max: 82013648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jmg9v7v57 + job_id: j0px9xr3p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:32:50.147901Z' + timestamp: '2024-04-02T15:41:29.144196Z' torchscript_onnx_qnn: - inference_time: 5175.0 - throughput: 193.23671497584542 + inference_time: 4871.0 + throughput: 205.29665366454526 estimated_peak_memory_range: - min: 4931584 - max: 94425040 + min: 4947968 + max: 93426784 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 230 + layers_on_npu: 229 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 230 - job_id: jz57zmrp3 + total_layers: 229 + job_id: jegn0kqk5 job_status: Passed diff --git a/qai_hub_models/models/yolov7/model.py b/qai_hub_models/models/yolov7/model.py index f46bed59..7c7330ea 100644 --- a/qai_hub_models/models/yolov7/model.py +++ b/qai_hub_models/models/yolov7/model.py @@ -32,10 +32,12 @@ def __init__( self, yolov7_feature_extractor: torch.nn.Module, yolov7_detector: torch.nn.Module, + include_postprocessing: bool = True, ) -> None: super().__init__() self.yolov7_feature_extractor = yolov7_feature_extractor self.yolov7_detector = yolov7_detector + self.include_postprocessing = include_postprocessing # All image input spatial dimensions should be a multiple of this stride. STRIDE_MULTIPLE = 32 @@ -44,6 +46,7 @@ def __init__( def from_pretrained( cls, weights_name: Optional[str] = DEFAULT_WEIGHTS, + include_postprocessing: bool = True, ): """Load YoloV7 from a weightfile created by the source YoloV7 repository.""" # Load PyTorch model from disk @@ -66,10 +69,7 @@ def from_pretrained( ].i # Index in sequential model yolov7_detect = _YoloV7Detector.from_yolov7_state_dict(detector_head_state_dict) - return cls( - yolov7_model, - yolov7_detect, - ) + return cls(yolov7_model, yolov7_detect, include_postprocessing) def forward(self, image: torch.Tensor): """ @@ -81,14 +81,26 @@ def forward(self, image: torch.Tensor): 3-channel Color Space: BGR Returns: - boxes: Shape [batch, num preds, 4] where 4 == (center_x, center_y, w, h) - class scores multiplied by confidence: Shape [batch, num_preds, # of classes (typically 80)] + If self.include_postprocessing: + boxes: Shape [batch, num preds, 4] where 4 == (center_x, center_y, w, h) + classes: class scores multiplied by confidence: Shape [batch, num_preds, # of classes (typically 80)] + + Otherwise: + detector_output: torch.Tensor + Shape is [batch, num_preds, k] + where, k = # of classes + 5 + k is structured as follows [box_coordinates (4) , conf (1) , # of classes] + and box_coordinates are [x_center, y_center, w, h] """ feature_extraction_output = ( *self.yolov7_feature_extractor(image), ) # Convert output list to Tuple, for exportability prediction = self.yolov7_detector(feature_extraction_output) - return detect_postprocess(prediction) + return ( + detect_postprocess(prediction) + if self.include_postprocessing + else prediction + ) @staticmethod def get_input_spec( diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml index d9dc602f..17e74ab7 100644 --- a/qai_hub_models/models/yolov7/perf.yaml +++ b/qai_hub_models/models/yolov7/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: Yolo-v7 performance_metrics: - torchscript_onnx_tflite: - inference_time: 24023.0 - throughput: 41.626774341256294 + inference_time: 25218.0 + throughput: 39.65421524308034 estimated_peak_memory_range: - min: 9568256 - max: 12076232 + min: 9555968 + max: 43269368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 21 total_layers: 307 - job_id: jqpye94gy + job_id: jopr6wd0p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:22:25.772406Z' + timestamp: '2024-04-02T15:35:57.654637Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 17674.0 - throughput: 56.580287427860135 + inference_time: 19396.0 + throughput: 51.557022066405445 estimated_peak_memory_range: - min: 327680 - max: 113867968 + min: 12288 + max: 131497776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 21 total_layers: 307 - job_id: j2p0ynegw + job_id: jep2xedrg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:22:25.772414Z' + timestamp: '2024-04-02T15:35:57.654650Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/yolov8_det/app.py b/qai_hub_models/models/yolov8_det/app.py index f642e153..0d45b8ab 100644 --- a/qai_hub_models/models/yolov8_det/app.py +++ b/qai_hub_models/models/yolov8_det/app.py @@ -4,9 +4,12 @@ # --------------------------------------------------------------------- from __future__ import annotations +from typing import Tuple + import torch from qai_hub_models.models._shared.yolo.app import YoloObjectDetectionApp +from qai_hub_models.models.yolov8_det.model import yolov8_detect_postprocess class YoloV8DetectionApp(YoloObjectDetectionApp): @@ -15,3 +18,8 @@ def check_image_size(self, pixel_values: torch.Tensor) -> None: YoloV8 does not check for spatial dim shapes for input image """ pass + + def pre_nms_postprocess( + self, prediction: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + return yolov8_detect_postprocess(prediction) diff --git a/qai_hub_models/models/yolov8_det/model.py b/qai_hub_models/models/yolov8_det/model.py index 7d7a1833..66aaad78 100644 --- a/qai_hub_models/models/yolov8_det/model.py +++ b/qai_hub_models/models/yolov8_det/model.py @@ -31,15 +31,18 @@ class YoloV8Detector(BaseModel): """Exportable YoloV8 bounding box detector, end-to-end.""" - def __init__(self, model: nn.Module) -> None: + def __init__(self, model: nn.Module, include_postprocessing: bool = True) -> None: super().__init__() self.model = model + self.include_postprocessing = include_postprocessing @classmethod - def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS): + def from_pretrained( + cls, ckpt_name: str = DEFAULT_WEIGHTS, include_postprocessing: bool = True + ): model = ultralytics_YOLO(ckpt_name).model model.eval() - return cls(model) + return cls(model, include_postprocessing) def forward(self, image: torch.Tensor): """ @@ -51,10 +54,26 @@ def forward(self, image: torch.Tensor): 3-channel Color Space: RGB Returns: - boxes: Shape [batch, num preds, 4] where 4 == (center_x, center_y, w, h) - class scores multiplied by confidence: Shape [batch, num_preds, # of classes (typically 80)] + If self.include_postprocessing: + boxes: torch.Tensor + Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) + scores: torch.Tensor + class scores multiplied by confidence: Shape is [batch, num_preds] + class_idx: torch.tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + + Otherwise: + predictions: torch.Tensor + Shape is [batch, k, num_preds] + Where, k = # of classes + 4 + The array dimension k is structured as follows: + [box coordintes, # of classes] + where box coordinates are [x_center, y_center, w, h] """ predictions, *_ = self.model(image) + if not self.include_postprocessing: + return predictions + boxes, scores, classes = yolov8_detect_postprocess(predictions) return boxes, scores, classes @@ -81,9 +100,10 @@ def yolov8_detect_postprocess(detector_output: torch.Tensor): detector_output: torch.Tensor The output of Yolo Detection model Shape is [batch, k, num_preds] - where, k = # of classes + 4 - k is structured as follows [boxes (4) : # of classes] - and boxes are co-ordinates [x_center, y_center, w, h] + Where, k = # of classes + 4 + The array dimension k is structured as follows: + [box coordintes, # of classes] + where box coordinates are [x_center, y_center, w, h] Returns: boxes: torch.Tensor diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml index ecd1b9a2..526f1b82 100644 --- a/qai_hub_models/models/yolov8_det/perf.yaml +++ b/qai_hub_models/models/yolov8_det/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: YOLOv8-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 9217.0 - throughput: 108.49517196484756 + inference_time: 6113.0 + throughput: 163.5858007524947 estimated_peak_memory_range: - min: 262144 - max: 19308896 + min: 233472 + max: 8968336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 300 - job_id: jo5mrw9gk + job_id: jqpyzm28g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,28 +53,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:28:16.047386Z' + timestamp: '2024-04-02T15:27:43.907101Z' torchscript_onnx_qnn: - inference_time: 7039.0 - throughput: 142.06563432305725 + inference_time: 5316.0 + throughput: 188.11136192626034 estimated_peak_memory_range: - min: 4984832 - max: 18803744 + min: 4935680 + max: 19108344 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 294 + layers_on_npu: 293 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 294 - job_id: joprk4750 + total_layers: 293 + job_id: j1p821rkp job_status: Passed - torchscript_onnx_tflite: - inference_time: 6502.0 - throughput: 153.79883112888342 + inference_time: 4320.0 + throughput: 231.4814814814815 estimated_peak_memory_range: - min: 24576 - max: 83870080 + min: 73728 + max: 88723920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 300 - job_id: jegn29qgo + job_id: j2p04699g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,19 +91,19 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:30:19.085764Z' + timestamp: '2024-04-02T15:30:24.719256Z' torchscript_onnx_qnn: - inference_time: 4840.0 - throughput: 206.61157024793388 + inference_time: 3677.0 + throughput: 271.9608376393799 estimated_peak_memory_range: - min: 4947968 - max: 123420640 + min: 4931584 + max: 110753456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 294 + layers_on_npu: 293 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 294 - job_id: jep287qp6 + total_layers: 293 + job_id: jogkv80wp job_status: Passed diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml index 1f87df00..90e771d1 100644 --- a/qai_hub_models/models/yolov8_seg/perf.yaml +++ b/qai_hub_models/models/yolov8_seg/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,11 +32,11 @@ models: - name: YOLOv8-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 10665.0 - throughput: 93.76465072667604 + inference_time: 7056.0 + throughput: 141.7233560090703 estimated_peak_memory_range: - min: 4616192 - max: 6990768 + min: 4612096 + max: 14526392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -43,7 +44,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: j1glnxepv + job_id: jn5q0v1np job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:08:48.972058Z' + timestamp: '2024-04-02T15:25:01.845719Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,11 +70,11 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 7417.0 - throughput: 134.8254011055683 + inference_time: 5151.0 + throughput: 194.1370607649 estimated_peak_memory_range: - min: 53248 - max: 91611328 + min: 16384 + max: 98489488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,7 +82,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jw5667v5o + job_id: j1gl4l8j5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:08:48.972071Z' + timestamp: '2024-04-02T15:25:01.845733Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py index 688961f4..dca6ae98 100644 --- a/qai_hub_models/utils/args.py +++ b/qai_hub_models/utils/args.py @@ -167,20 +167,53 @@ def get_model_cli_parser( for name, param in from_pretrained_sig.parameters.items(): if name == "cls": continue + + help = ( + f"For documentation, see {cls.__name__}::from_pretrained::parameter {name}." + ) + # Determining type from param.annotation is non-trivial (it can be a # strings like "Optional[str]" or "bool | None"). + bool_action = None + arg_name = f"--{name.replace('_', '-')}" if param.default is not None: type_ = type(param.default) + + if type_ == bool: + if param.default: + bool_action = "store_false" + # If the default is true, and the arg name does not start with no_, + # then add the no- to the argument (as it should be passed as --no-enable-flag, not --enable-flag) + if name.startswith("no_"): + arg_name = f"--{name[3:].replace('_', '-')}" + else: + arg_name = f"--no-{name.replace('_', '-')}" + help = ( + f"{help} Setting this flag will set parameter {name} to False." + ) + else: + bool_action = "store_true" + # If the default is false, and the arg name starts with no_, + # then remove the no- from the argument (as it should be passed as --enable-flag, not --no-enable-flag) + arg_name = f"--{name.replace('_', '-')}" + help = ( + f"{help} Setting this flag will set parameter {name} to True." + ) elif param.annotation == "bool": type_ = bool else: type_ = str - parser.add_argument( - f"--{name.replace('_', '-')}", - type=type_, - default=param.default, - help=f"For documentation, see {cls.__name__}::from_pretrained.", - ) + + if bool_action: + parser.add_argument(arg_name, dest=name, action=bool_action, help=help) + else: + parser.add_argument( + arg_name, + dest=name, + type=type_, + default=param.default, + help=help, + ) return parser @@ -248,6 +281,7 @@ def demo_model_from_cli_args( skip_downloading=True, skip_summary=True, target_runtime=cli_args.target_runtime, + **get_model_kwargs(model_cls, vars(cli_args)), ) if len(export_output) == 0 or isinstance(export_output[0], str): diff --git a/qai_hub_models/utils/model_adapters.py b/qai_hub_models/utils/model_adapters.py index 73cbb30d..44e94b16 100644 --- a/qai_hub_models/utils/model_adapters.py +++ b/qai_hub_models/utils/model_adapters.py @@ -31,7 +31,13 @@ def __init__(self, base_model: torch.jit.ScriptModule | torch.nn.Module): self.base_model = base_model def __call__(self, *args) -> Tuple[np.ndarray, ...]: - input_data = tuple(torch.from_numpy(t) for t in args) + inp = [] + for t in args: + if not isinstance(t, np.ndarray): + inp.append(t) + else: + inp.append(torch.from_numpy(t)) + input_data = tuple(inp) res = self.base_model(*input_data) if isinstance(res, torch.Tensor): output = res.detach().numpy() diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py index 665b3261..11cbd9f6 100644 --- a/qai_hub_models/utils/quantization_aimet.py +++ b/qai_hub_models/utils/quantization_aimet.py @@ -37,15 +37,10 @@ import torch from qai_hub.client import DatasetEntries -from qai_hub_models.evaluators.base_evaluators import ( - BaseEvaluator, - _DataLoader, - _for_each_batch, -) +from qai_hub_models.evaluators.base_evaluators import _DataLoader, _for_each_batch from qai_hub_models.models._shared.common import apply_module_function_recursively from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.protocols import ( - EvalModelProtocol, PretrainedHubModelProtocol, QuantizableModelProtocol, ) @@ -122,10 +117,10 @@ def quantize( self, data: _DataLoader, num_samples: int | None = None, - evaluator: BaseEvaluator | None = None, device: str = "cpu", requantize_model_weights=False, - ) -> float | None: + data_has_gt=False, + ) -> None: """ Compute quantization encodings for this model with the given dataset and model evaluator. @@ -148,21 +143,16 @@ def quantize( Number of samples to use for evaluation. One sample is one iteration from iter(data). If none, defaults to the number of samples in the dataset. - evaluator: BaseModelEvaluator | None - Evaluator to populate while quantizing the data. - If not provided, an evaluator is not used. - device: str Name of device on which inference should be run. requantize_model_weights: bool If a weight is quantized, recompute its quantization parameters. - Returns: - If an evaluator is provided, returns its accuracy score. No return value otherwise. + data_has_gt: bool + Set to true if the data loader passed in also provides ground truth data. + The ground truth data will be discarded for quantization. """ - if not evaluator and isinstance(self, EvalModelProtocol): - evaluator = self.get_evaluator() # Enable or disable quantization for model parameters (model weights). # Activations are always re-quantized. @@ -179,25 +169,15 @@ def quantize( # Un-freeze the quantizer. param_quantizer._is_encoding_frozen = False - # Reset evaluator if applicable - if evaluator: - evaluator.reset() - # Define evaluator function for this model. - def evaluator_func(model: torch.nn.Module, args): + def batched_forward(model: torch.nn.Module, args): # This function is defined because AIMET does not unwrap # the arguments you pass to `compute_encodings`. - return ( - evaluator.add_from_dataset(model, *args) - if evaluator - else _for_each_batch(model, *args) - ) + data, num_samples, device = args + _for_each_batch(model, data, num_samples, device, data_has_gt=data_has_gt) # Compute the new encodings. - self.quant_sim.compute_encodings(evaluator_func, [data, num_samples, device]) - - # Return accuracy score if applicable - return evaluator.get_accuracy_score() if evaluator else None + self.quant_sim.compute_encodings(batched_forward, [data, num_samples, device]) def convert_to_torchscript_and_aimet_encodings( self, @@ -316,7 +296,7 @@ def get_calibration_data( def get_hub_compile_options( self, target_runtime: TargetRuntime, other_compile_options: str = "" ) -> str: - compile_options = super().get_hub_compile_options( + compile_options = super().get_hub_compile_options( # type: ignore target_runtime, other_compile_options ) return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/scripts/examples/quantize_deeplabv3.py b/scripts/examples/quantize_deeplabv3.py index 8d3d62ca..d811811d 100644 --- a/scripts/examples/quantize_deeplabv3.py +++ b/scripts/examples/quantize_deeplabv3.py @@ -48,7 +48,7 @@ m.quant_sim.set_and_freeze_param_encodings(weight_encodings.fetch()) # Quantize activations - m.quantize(train_loader, args.num_iter, m.get_evaluator()) + m.quantize(train_loader, args.num_iter) # Export encodings m.convert_to_torchscript_and_aimet_encodings(os.getcwd(), model_name=MODEL_ID) diff --git a/scripts/examples/quantize_ffnet.py b/scripts/examples/quantize_ffnet.py index e0786b03..beb54b5b 100644 --- a/scripts/examples/quantize_ffnet.py +++ b/scripts/examples/quantize_ffnet.py @@ -74,7 +74,12 @@ model = FFNetQuantizable_cls.from_pretrained(aimet_encodings=None) # Quantize weights and activations - model.quantize(loader, num_samples=args.num_iter, requantize_model_weights=True) + model.quantize( + loader, + num_samples=args.num_iter, + requantize_model_weights=True, + data_has_gt=True, + ) output_path = args.output_dir or str(Path() / "build") output_name = args.output_name or f"{args.variant}_quantized_encodings" diff --git a/scripts/examples/quantize_imagenet_classifier.py b/scripts/examples/quantize_imagenet_classifier.py index 79d1e063..c5ec4c11 100644 --- a/scripts/examples/quantize_imagenet_classifier.py +++ b/scripts/examples/quantize_imagenet_classifier.py @@ -91,8 +91,20 @@ model = ImageNetClassifier_cls.from_pretrained(aimet_encodings=None) - accuracy = model.quantize(dataloader, args.num_iter, model.get_evaluator()) - print(f"Accuracy: {accuracy * 100:.3g}%") + evaluator = model.get_evaluator() + + evaluator.reset() + evaluator.add_from_dataset(model, dataloader, args.num_iter) + accuracy_fp32 = evaluator.get_accuracy_score() + + model.quantize(dataloader, args.num_iter, data_has_gt=True) + + evaluator.reset() + evaluator.add_from_dataset(model, dataloader, args.num_iter) + accuracy_int8 = evaluator.get_accuracy_score() + + print(f"FP32 Accuracy: {accuracy_fp32 * 100:.3g}%") + print(f"INT8 Accuracy: {accuracy_int8 * 100:.3g}%") output_path = args.output_dir or str(Path() / "build") output_name = args.output_name or f"{args.model}_quantized_encodings" diff --git a/scripts/examples/quantize_superresolution.py b/scripts/examples/quantize_superresolution.py index 02f9caf2..0a354e54 100644 --- a/scripts/examples/quantize_superresolution.py +++ b/scripts/examples/quantize_superresolution.py @@ -60,9 +60,21 @@ model = module.Model.from_pretrained(aimet_encodings=None) assert isinstance(model, AIMETQuantizableMixin) - # Quantize activations - accuracy = model.quantize(dataloader, args.num_iter, model.get_evaluator()) - print(f"PSNR: {accuracy}") + evaluator = model.get_evaluator() + + evaluator.reset() + evaluator.add_from_dataset(model, dataloader, args.num_iter) + accuracy_fp32 = evaluator.get_accuracy_score() + + # Quantize + model.quantize(dataloader, args.num_iter, data_has_gt=True) + + evaluator.reset() + evaluator.add_from_dataset(model, dataloader, args.num_iter) + accuracy_int8 = evaluator.get_accuracy_score() + + print(f"FP32 PSNR: {accuracy_fp32} dB") + print(f"INT8 PSNR: {accuracy_int8} dB") # Export encodings model.quant_sim.save_encodings_to_json(Path() / "build", module.MODEL_ID) diff --git a/scripts/examples/test_numerics_imagenet_classifier_quantized.py b/scripts/examples/test_numerics_imagenet_classifier_quantized.py index 3416382a..4560c6e8 100644 --- a/scripts/examples/test_numerics_imagenet_classifier_quantized.py +++ b/scripts/examples/test_numerics_imagenet_classifier_quantized.py @@ -91,18 +91,18 @@ def test_dataloader_is_deterministic(data_loaders): @pytest.fixture( scope="module", params=[ - # Class, Calibration accuracy, AIMET accuracy - (MobileNetV2Quantizable, 0.8021, 0.8100), - (MobileNetV3LargeQuantizable, 0.8438, 0.8550), - (ResNet18Quantizable, 0.8021, 0.8010), - (ResNet50Quantizable, 0.8229, 0.8520), - (ResNet101Quantizable, 0.8125, 0.8530), - (ResNeXt50Quantizable, 0.8333, 0.8880), - (ResNeXt101Quantizable, 0.8542, 0.9250), - (SqueezeNetQuantizable, 0.6042, 0.6410), - (RegNetQuantizable, 0.8229, 0.8750), - (WideResNet50Quantizable, 0.8958, 0.9190), - (ShufflenetV2Quantizable, 0.7083, 0.6740), + # Class, AIMET accuracy + (MobileNetV2Quantizable, 0.8100), + (MobileNetV3LargeQuantizable, 0.8550), + (ResNet18Quantizable, 0.8010), + (ResNet50Quantizable, 0.8520), + (ResNet101Quantizable, 0.8530), + (ResNeXt50Quantizable, 0.8880), + (ResNeXt101Quantizable, 0.9250), + (SqueezeNetQuantizable, 0.6410), + (RegNetQuantizable, 0.8750), + (WideResNet50Quantizable, 0.9190), + (ShufflenetV2Quantizable, 0.6740), ], ) def quantized_model(request, data_loaders, test_data): @@ -112,16 +112,12 @@ def quantized_model(request, data_loaders, test_data): """ img_test, label_test, hub_dataset = test_data calib_loader, test_loader = data_loaders - model_cls, target_calib_acc, target_sim_acc = request.param + model_cls, target_sim_acc = request.param model = model_cls.from_pretrained(aimet_encodings=None) # Calibration in quantization num_calib_batches = 3 - calib_accuracy = model.quantize( - calib_loader, num_calib_batches, evaluator=model.get_evaluator() - ) - print(f"{model_cls=}, {calib_accuracy=}") - np.testing.assert_allclose(target_calib_acc, calib_accuracy, atol=0.01) + model.quantize(calib_loader, num_calib_batches, data_has_gt=True) # QuantSim evaluation on eval set evaluator = model.get_evaluator() @@ -167,11 +163,11 @@ def test_make_encoding_w8a8_accuracy( expected_size_mb_and_acc = { (SourceModelFormat.ONNX, TargetRuntime.TFLITE, MobileNetV2Quantizable): ( 3.64, - 0.784, + 0.801, ), (SourceModelFormat.ONNX, TargetRuntime.QNN, MobileNetV2Quantizable): ( 4.02, - 0.790, + 0.801, ), (SourceModelFormat.ONNX, TargetRuntime.TFLITE, MobileNetV3LargeQuantizable): ( 5.79,