v0.7.0

See https://github.com/quic/ai-hub-models/releases/v0.7.0 for changelog. Signed-off-by: QAIHM Team <[email protected]>
quic · May 29, 2024 · c546965 · c546965
1 parent 4c5f865
commit c546965
Show file tree

Hide file tree

Showing 293 changed files with 12,766 additions and 6,347 deletions.
diff --git a/README.md b/README.md
@@ -271,8 +271,8 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE
 | | | | |
 | **Image Classification**
 | [ConvNext-Tiny](https://aihub.qualcomm.com/models/convnext_tiny) | [qai_hub_models.models.convnext_tiny](qai_hub_models/models/convnext_tiny/README.md) | ✔️ | ✔️ | ✔️
-| [ConvNext-Tiny-w8a16-Quantized](qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md) | [qai_hub_models.models.convnext_tiny_w8a16_quantized](qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [ConvNext-Tiny-w8a8-Quantized](qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md) | [qai_hub_models.models.convnext_tiny_w8a8_quantized](qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [ConvNext-Tiny-w8a16-Quantized](https://aihub.qualcomm.com/models/convnext_tiny_w8a16_quantized) | [qai_hub_models.models.convnext_tiny_w8a16_quantized](qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [ConvNext-Tiny-w8a8-Quantized](https://aihub.qualcomm.com/models/convnext_tiny_w8a8_quantized) | [qai_hub_models.models.convnext_tiny_w8a8_quantized](qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [DenseNet-121](https://aihub.qualcomm.com/models/densenet121) | [qai_hub_models.models.densenet121](qai_hub_models/models/densenet121/README.md) | ✔️ | ✔️ | ✔️
 | [EfficientNet-B0](https://aihub.qualcomm.com/models/efficientnet_b0) | [qai_hub_models.models.efficientnet_b0](qai_hub_models/models/efficientnet_b0/README.md) | ✔️ | ✔️ | ✔️
 | [GoogLeNet](https://aihub.qualcomm.com/models/googlenet) | [qai_hub_models.models.googlenet](qai_hub_models/models/googlenet/README.md) | ✔️ | ✔️ | ✔️
@@ -371,10 +371,10 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE
 | [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | ✔️ | ✔️ | ✔️
 | [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | ✔️ | ✔️ | ✔️
 | [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️
-| [Posenet-Mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | [qai_hub_models.models.posenet_mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | ✔️ | ✔️ | ✔️
+| [Posenet-Mobilenet](https://aihub.qualcomm.com/models/posenet_mobilenet) | [qai_hub_models.models.posenet_mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Depth Estimation**
-| [Midas-V2](qai_hub_models/models/midas/README.md) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | ✔️ | ✔️ | ✔️
+| [Midas-V2](https://aihub.qualcomm.com/models/midas) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | ✔️ | ✔️ | ✔️
 
 ### Audio
 
@@ -395,8 +395,8 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE
 | Model | README | Torch App | Device Export | CLI Demo
 | -- | -- | -- | -- | --
 | | | | |
-| [OpenAI-Clip](https://aihub.qualcomm.com/models/openai_clip) | [qai_hub_models.models.openai_clip](qai_hub_models/models/openai_clip/README.md) | ✔️ | ✔️ | ✔️
 | [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | ✔️ | ✔️ | ✔️
+| [OpenAI-Clip](https://aihub.qualcomm.com/models/openai_clip) | [qai_hub_models.models.openai_clip](qai_hub_models/models/openai_clip/README.md) | ✔️ | ✔️ | ✔️
 
 ### Generative Ai
 

diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-__version__ = "0.6.0"
+__version__ = "0.7.0"
diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt
@@ -1,11 +1,11 @@
 # If you:
 # - Install requirements.txt
-# - Run the aimet installation script
 # - Then install this requirements file
 # That should create an environment that works for every single model.
 
 Deprecated==1.2.11
 PySoundFile; sys_platform == 'win32'
+aimet-torch==1.31.2; sys_platform == "linux"
 albumentations==0.5.2
 av==10.0.0
 basicsr==1.4.2
@@ -22,7 +22,6 @@ imageio[ffmpeg]==2.31.5
 imagesize==1.4.1
 kornia==0.5.0
 librosa==0.10.1
-matplotlib==3.7.4
 mmcv==2.1.0
 mmdet==3.2.0
 mmpose==1.2.0
@@ -45,7 +44,7 @@ thop==0.1.1.post2209072238
 timm==0.9.11
 tensorboard==2.13.0
 torchaudio==0.13.1
-transformers==4.27.4
+transformers==4.41.1
 treelib==1.6.1
 tucker-conv==1.0.1
 ultralytics==8.0.193

diff --git a/qai_hub_models/models/_shared/ffnet/model.py b/qai_hub_models/models/_shared/ffnet/model.py
@@ -6,6 +6,7 @@
 
 import os
 from importlib import reload
+from pathlib import Path
 from typing import Type, TypeVar
 
 import torch
@@ -76,12 +77,11 @@ def from_pretrained(cls: Type[FFNetType], variant_name: str) -> FFNetType:
 def _load_ffnet_source_model(variant_name) -> torch.nn.Module:
     subpath, src_name, dst_name = FFNET_SUBPATH_NAME_LOOKUP[variant_name]
 
-    weights_url = os.path.join(FFNET_WEIGHTS_URL_ROOT, src_name)
     weights_path = CachedWebModelAsset(
-        weights_url,
+        f"{FFNET_WEIGHTS_URL_ROOT.rstrip('/')}/{src_name.lstrip('/')}",
         MODEL_ID,
         MODEL_ASSET_VERSION,
-        os.path.join(subpath, dst_name),
+        Path(subpath) / dst_name,
     ).fetch()
     root_weights_path = os.path.dirname(os.path.dirname(weights_path))
 

diff --git a/qai_hub_models/models/_shared/stable_diffusion/__init__.py b/qai_hub_models/models/_shared/stable_diffusion/__init__.py
@@ -0,0 +1,4 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py
@@ -186,9 +186,18 @@ def export_model(
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = ".so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = ".tflite"
+        elif target_runtime == TargetRuntime.ORT:
+            target_runtime_extension = ".onnx"
+
         os.makedirs(output_path, exist_ok=True)
         target_model: hub.Model = compile_job.get_target_model()  # type: ignore
-        target_model.download(str(output_path / f"{model_name}.tflite"))
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:

diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml
@@ -36,34 +36,34 @@ models:
 - name: AOT-GAN
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 164598.0
-      throughput: 6.075407963644759
+      inference_time: 164624.0
+      throughput: 6.074448440081641
       estimated_peak_memory_range:
-        min: 4349952
-        max: 7789760
+        min: 5124096
+        max: 8396488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
         layers_on_npu: 235
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jmg9werlp
+      job_id: j1glkw4ep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 164540.0
-      throughput: 6.077549532028686
+      inference_time: 165008.0
+      throughput: 6.06031222728595
       estimated_peak_memory_range:
-        min: 4341760
-        max: 36913480
+        min: 3850240
+        max: 32305264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
         layers_on_npu: 275
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jz57x3mlg
+      job_id: jwgovdz45
       job_status: Passed
     torchscript_onnx_ort:
       inference_time: 'null'
@@ -78,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 0
-      job_id: jegn384r5
+      job_id: jygz7z2zp
       job_status: Failed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,36 +87,36 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-05-20T16:35:27.553176Z'
+    timestamp: '2024-05-24T06:12:49.396717Z'
   - torchscript_onnx_tflite:
-      inference_time: 120809.0
-      throughput: 8.277528992045294
+      inference_time: 120767.0
+      throughput: 8.280407727276492
       estimated_peak_memory_range:
-        min: 2879488
-        max: 222384800
+        min: 2646016
+        max: 222181760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
         layers_on_npu: 235
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jnp1ex92g
+      job_id: jw561o2vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 121163.0
-      throughput: 8.253344667926678
+      inference_time: 121460.0
+      throughput: 8.233163181294254
       estimated_peak_memory_range:
-        min: 3252224
-        max: 144610800
+        min: 1572864
+        max: 147148656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
         layers_on_npu: 275
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jqp4v07vp
+      job_id: j1pvw2q7g
       job_status: Passed
     torchscript_onnx_ort:
       inference_time: 'null'
@@ -131,7 +131,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 0
-      job_id: joprejr95
+      job_id: jz5w9ywzp
       job_status: Failed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,36 +140,36 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-05-20T16:35:27.684430Z'
+    timestamp: '2024-05-24T06:12:49.517726Z'
   - torchscript_onnx_tflite:
-      inference_time: 161130.0
-      throughput: 6.206168931918326
+      inference_time: 164352.0
+      throughput: 6.084501557632398
       estimated_peak_memory_range:
-        min: 3170304
-        max: 13340440
+        min: 3293184
+        max: 6536160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
         layers_on_npu: 235
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jvgdolkep
+      job_id: j1p3monxg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 164457.0
-      throughput: 6.080616817769995
+      inference_time: 164668.0
+      throughput: 6.072825321252459
       estimated_peak_memory_range:
-        min: 4214784
-        max: 29715440
+        min: 4333568
+        max: 28875248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
         layers_on_npu: 275
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jo5m3y7wg
+      job_id: jlpev6o75
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +178,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-20T16:35:27.816239Z'
+    timestamp: '2024-05-24T06:12:49.635903Z'
   - torchscript_onnx_qnn:
-      inference_time: 145454.0
-      throughput: 6.87502578134668
+      inference_time: 145505.0
+      throughput: 6.872616061303735
       estimated_peak_memory_range:
         min: 4202496
         max: 4202496
@@ -192,7 +192,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: j0pxy2q1g
+      job_id: j7gjl3d7p
       job_status: Passed
     torchscript_onnx_ort:
       inference_time: 'null'
@@ -207,28 +207,28 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 0
-      job_id: jep2ln14g
+      job_id: jmg94o0q5
       job_status: Failed
     torchscript_onnx_ort_dml_gpu:
-      inference_time: 'null'
-      throughput: 'null'
+      inference_time: 739119.0
+      throughput: 1.3529621075902527
       estimated_peak_memory_range:
-        min: 0
-        max: 0
-      primary_compute_unit: 'null'
-      precision: 'null'
+        min: 416927744
+        max: 416927744
+      primary_compute_unit: CPU
+      precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 0
-      job_id: jqpy60l75
-      job_status: Failed
+        layers_on_cpu: 234
+        total_layers: 234
+      job_id: jnp18o2kg
+      job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
       os: '11'
       form_factor: Compute
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-05-20T16:35:27.949164Z'
+    timestamp: '2024-05-24T06:12:49.749014Z'
diff --git a/qai_hub_models/models/controlnet_quantized/model.py b/qai_hub_models/models/controlnet_quantized/model.py
@@ -4,7 +4,7 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import os
+from pathlib import Path
 
 from qai_hub_models.models.protocols import FromPrecompiledProtocol
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
@@ -14,10 +14,10 @@
 MODEL_ID = __name__.split(".")[-2]
 MODEL_ASSET_VERSION = 1
 QNN_SDK_PREFIX = "QNN219"
-TEXT_ENCODER = os.path.join(QNN_SDK_PREFIX, "text_encoder.serialized.bin")
-UNET_DIFFUSER = os.path.join(QNN_SDK_PREFIX, "unet.serialized.bin")
-VAE_DECODER = os.path.join(QNN_SDK_PREFIX, "vae_decoder.serialized.bin")
-CONTROL_NET = os.path.join(QNN_SDK_PREFIX, "controlnet.serialized.bin")
+TEXT_ENCODER = Path(QNN_SDK_PREFIX, "text_encoder.serialized.bin")
+UNET_DIFFUSER = Path(QNN_SDK_PREFIX, "unet.serialized.bin")
+VAE_DECODER = Path(QNN_SDK_PREFIX, "vae_decoder.serialized.bin")
+CONTROL_NET = Path(QNN_SDK_PREFIX, "controlnet.serialized.bin")
 
 
 class ControlNetQuantized(FromPrecompiledProtocol, CollectionModel):

diff --git a/qai_hub_models/models/controlnet_quantized/requirements.txt b/qai_hub_models/models/controlnet_quantized/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.27.4
+transformers==4.41.1
 diffusers[torch]==0.21.4
diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py
@@ -184,9 +184,18 @@ def export_model(
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = ".so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = ".tflite"
+        elif target_runtime == TargetRuntime.ORT:
+            target_runtime_extension = ".onnx"
+
         os.makedirs(output_path, exist_ok=True)
         target_model: hub.Model = compile_job.get_target_model()  # type: ignore
-        target_model.download(str(output_path / f"{model_name}.tflite"))
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling: