From 953bd55e0170a2ed3ec4a80de2b8966f6f1b67c0 Mon Sep 17 00:00:00 2001 From: Qualcomm AI Stack Models Bot <quic_qaihm_bot@quicinc.com> Date: Wed, 27 Mar 2024 11:24:02 -0700 Subject: [PATCH] v0.4.0r3 Update Performance Numbers Signed-off-by: QAIHM Team <quic_qaihm_bot@quicinc.com> --- README.md | 2 +- .../models/_shared/ffnet_quantized/model.py | 22 -- .../models/_shared/whisper/model.py | 8 - qai_hub_models/models/aotgan/export.py | 2 +- qai_hub_models/models/common.py | 1 + qai_hub_models/models/convnext_tiny/export.py | 2 +- qai_hub_models/models/ddrnet23_slim/export.py | 2 +- .../models/deeplabv3_resnet50/export.py | 2 +- qai_hub_models/models/densenet121/export.py | 2 +- .../models/detr_resnet101/export.py | 2 +- .../models/detr_resnet101_dc5/export.py | 2 +- qai_hub_models/models/detr_resnet50/export.py | 2 +- .../models/detr_resnet50_dc5/export.py | 2 +- .../models/efficientnet_b0/export.py | 2 +- qai_hub_models/models/esrgan/export.py | 2 +- .../models/facebook_denoiser/export.py | 2 +- qai_hub_models/models/fastsam_s/export.py | 2 +- qai_hub_models/models/fastsam_x/export.py | 2 +- qai_hub_models/models/fcn_resnet50/export.py | 2 +- .../models/ffnet_122ns_lowres/export.py | 2 +- qai_hub_models/models/ffnet_40s/export.py | 2 +- .../models/ffnet_40s_quantized/export.py | 2 +- qai_hub_models/models/ffnet_54s/export.py | 2 +- .../models/ffnet_54s_quantized/export.py | 2 +- qai_hub_models/models/ffnet_78s/export.py | 2 +- .../models/ffnet_78s_lowres/export.py | 2 +- .../models/ffnet_78s_quantized/export.py | 2 +- qai_hub_models/models/googlenet/export.py | 2 +- .../models/googlenet_quantized/export.py | 2 +- .../models/googlenet_quantized/model.py | 6 - qai_hub_models/models/hrnet_pose/export.py | 6 +- .../models/hrnet_pose_quantized/export.py | 6 +- .../huggingface_wavlm_base_plus/export.py | 2 +- .../huggingface_wavlm_base_plus/perf.yaml | 41 ++-- qai_hub_models/models/inception_v3/export.py | 2 +- .../models/inception_v3_quantized/export.py | 2 +- .../models/inception_v3_quantized/model.py | 14 -- qai_hub_models/models/lama_dilated/export.py | 2 +- qai_hub_models/models/litehrnet/export.py | 2 +- .../models/mediapipe_face/export.py | 13 +- .../models/mediapipe_hand/export.py | 13 +- .../models/mediapipe_pose/export.py | 13 +- .../models/mediapipe_selfie/export.py | 2 +- qai_hub_models/models/mnasnet05/export.py | 2 +- qai_hub_models/models/mobilenet_v2/export.py | 2 +- .../models/mobilenet_v2_quantized/export.py | 2 +- .../models/mobilenet_v2_quantized/model.py | 14 -- .../models/mobilenet_v3_large/export.py | 2 +- .../mobilenet_v3_large_quantized/export.py | 2 +- .../mobilenet_v3_large_quantized/model.py | 6 - .../models/mobilenet_v3_small/export.py | 2 +- qai_hub_models/models/openai_clip/export.py | 13 +- qai_hub_models/models/openai_clip/perf.yaml | 145 +++---------- qai_hub_models/models/openpose/export.py | 2 +- qai_hub_models/models/protocols.py | 56 ++++- .../models/quicksrnetlarge/export.py | 2 +- .../quicksrnetlarge_quantized/export.py | 2 +- .../models/quicksrnetlarge_quantized/model.py | 9 + .../models/quicksrnetmedium/export.py | 2 +- .../quicksrnetmedium_quantized/export.py | 2 +- .../quicksrnetmedium_quantized/model.py | 9 + .../models/quicksrnetsmall/export.py | 2 +- .../quicksrnetsmall_quantized/export.py | 2 +- .../models/quicksrnetsmall_quantized/model.py | 9 + .../models/real_esrgan_general_x4v3/export.py | 2 +- .../models/real_esrgan_x4plus/export.py | 2 +- qai_hub_models/models/regnet/export.py | 2 +- qai_hub_models/models/resnet101/export.py | 2 +- .../models/resnet101_quantized/export.py | 2 +- .../models/resnet101_quantized/model.py | 14 -- qai_hub_models/models/resnet18/export.py | 2 +- .../models/resnet18_quantized/export.py | 2 +- .../models/resnet18_quantized/model.py | 14 -- qai_hub_models/models/resnet50/export.py | 2 +- qai_hub_models/models/resnext101/export.py | 2 +- .../models/resnext101_quantized/export.py | 2 +- .../models/resnext101_quantized/model.py | 14 -- qai_hub_models/models/resnext50/export.py | 2 +- .../models/resnext50_quantized/export.py | 2 +- .../models/resnext50_quantized/model.py | 14 -- qai_hub_models/models/sam/export.py | 5 +- qai_hub_models/models/sesr_m5/export.py | 2 +- .../models/sesr_m5_quantized/export.py | 2 +- .../models/sesr_m5_quantized/model.py | 9 + qai_hub_models/models/shufflenet_v2/export.py | 2 +- .../models/shufflenet_v2_quantized/export.py | 2 +- .../models/shufflenet_v2_quantized/model.py | 14 -- qai_hub_models/models/sinet/export.py | 2 +- qai_hub_models/models/squeezenet1_1/export.py | 2 +- .../models/squeezenet1_1_quantized/export.py | 2 +- .../models/squeezenet1_1_quantized/model.py | 14 -- qai_hub_models/models/stylegan2/export.py | 2 +- qai_hub_models/models/swin_base/export.py | 2 +- qai_hub_models/models/swin_small/export.py | 2 +- qai_hub_models/models/swin_tiny/export.py | 2 +- qai_hub_models/models/trocr/export.py | 14 +- .../models/unet_segmentation/export.py | 2 +- qai_hub_models/models/vit/export.py | 2 +- .../models/whisper_base_en/export.py | 5 +- .../models/whisper_small_en/export.py | 5 +- .../models/whisper_tiny_en/export.py | 5 +- qai_hub_models/models/wideresnet50/export.py | 2 +- .../models/wideresnet50_quantized/export.py | 2 +- .../models/wideresnet50_quantized/model.py | 14 -- qai_hub_models/models/xlsr/export.py | 2 +- .../models/xlsr_quantized/export.py | 2 +- qai_hub_models/models/xlsr_quantized/model.py | 9 + qai_hub_models/models/yolov6/export.py | 2 +- qai_hub_models/models/yolov7/export.py | 2 +- qai_hub_models/models/yolov8_det/export.py | 2 +- qai_hub_models/models/yolov8_seg/export.py | 2 +- qai_hub_models/utils/args.py | 4 + qai_hub_models/utils/base_model.py | 6 +- qai_hub_models/utils/config_loaders.py | 194 +++++++++++++----- qai_hub_models/utils/printing.py | 2 + qai_hub_models/utils/quantization_aimet.py | 25 ++- 116 files changed, 445 insertions(+), 496 deletions(-) diff --git a/README.md b/README.md index dbb402ab..d9443541 100644 --- a/README.md +++ b/README.md @@ -373,8 +373,8 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE | Model | README | Torch App | Device Export | CLI Demo | -- | -- | -- | -- | -- | | | | | -| [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | ✔️ | ✔️ | ✔️ | [OpenAI-Clip](https://aihub.qualcomm.com/models/openai_clip) | [qai_hub_models.models.openai_clip](qai_hub_models/models/openai_clip/README.md) | ✔️ | ✔️ | ✔️ +| [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | ✔️ | ✔️ | ✔️ ### Generative Ai diff --git a/qai_hub_models/models/_shared/ffnet_quantized/model.py b/qai_hub_models/models/_shared/ffnet_quantized/model.py index 68343a1c..438e6169 100644 --- a/qai_hub_models/models/_shared/ffnet_quantized/model.py +++ b/qai_hub_models/models/_shared/ffnet_quantized/model.py @@ -19,11 +19,8 @@ from aimet_torch.batch_norm_fold import fold_all_batch_norms from aimet_torch.model_preparer import prepare_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim -from qai_hub.client import DatasetEntries from qai_hub_models.models._shared.ffnet.model import FFNet -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime -from qai_hub_models.utils.input_spec import InputSpec MODEL_ID = __name__.split(".")[-2] FFNET_AIMET_CONFIG = os.path.abspath( @@ -47,14 +44,6 @@ def __init__( FFNet.__init__(self, ffnet_model.model) AIMETQuantizableMixin.__init__(self, ffnet_model) - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" - @classmethod def default_aimet_encodings(cls) -> str: raise NotImplementedError() @@ -88,14 +77,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - - def get_calibration_data( - self, target_runtime: TargetRuntime, input_spec: InputSpec | None = None - ) -> DatasetEntries | None: - # Do not provide calibration data - return None diff --git a/qai_hub_models/models/_shared/whisper/model.py b/qai_hub_models/models/_shared/whisper/model.py index ab5db089..dffcbb07 100644 --- a/qai_hub_models/models/_shared/whisper/model.py +++ b/qai_hub_models/models/_shared/whisper/model.py @@ -82,14 +82,6 @@ def get_input_spec() -> InputSpec: def from_pretrained(cls): return Whisper.from_pretrained().encoder - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --compute_unit gpu" - def get_hub_profile_options( self, target_runtime: TargetRuntime, other_profile_options: str = "" ) -> str: diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py index 1316347d..65f0b5c7 100644 --- a/qai_hub_models/models/aotgan/export.py +++ b/qai_hub_models/models/aotgan/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/common.py b/qai_hub_models/models/common.py index bf635e0e..37020fd3 100644 --- a/qai_hub_models/models/common.py +++ b/qai_hub_models/models/common.py @@ -11,6 +11,7 @@ class TargetRuntime(Enum): TFLITE = 0 QNN = 1 + ORT = 2 def __str__(self): return self.name.lower() diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py index 4649e4ef..f8600751 100644 --- a/qai_hub_models/models/convnext_tiny/export.py +++ b/qai_hub_models/models/convnext_tiny/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py index 62378aea..16a76e1f 100644 --- a/qai_hub_models/models/ddrnet23_slim/export.py +++ b/qai_hub_models/models/ddrnet23_slim/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py index ff57a6fc..d2a2445e 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/export.py +++ b/qai_hub_models/models/deeplabv3_resnet50/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/densenet121/export.py b/qai_hub_models/models/densenet121/export.py index 5d02faa4..4268f1fe 100644 --- a/qai_hub_models/models/densenet121/export.py +++ b/qai_hub_models/models/densenet121/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py index f46ca4e5..838626a5 100644 --- a/qai_hub_models/models/detr_resnet101/export.py +++ b/qai_hub_models/models/detr_resnet101/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py index e8c61070..2f8b1653 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/export.py +++ b/qai_hub_models/models/detr_resnet101_dc5/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py index 70838101..f6e4697e 100644 --- a/qai_hub_models/models/detr_resnet50/export.py +++ b/qai_hub_models/models/detr_resnet50/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py index 09e9b406..e3d443a3 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/export.py +++ b/qai_hub_models/models/detr_resnet50_dc5/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/efficientnet_b0/export.py b/qai_hub_models/models/efficientnet_b0/export.py index 44abf4a7..dfff029c 100644 --- a/qai_hub_models/models/efficientnet_b0/export.py +++ b/qai_hub_models/models/efficientnet_b0/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/esrgan/export.py b/qai_hub_models/models/esrgan/export.py index 01c64d63..d03a9f09 100644 --- a/qai_hub_models/models/esrgan/export.py +++ b/qai_hub_models/models/esrgan/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/facebook_denoiser/export.py b/qai_hub_models/models/facebook_denoiser/export.py index f536b473..997188b1 100644 --- a/qai_hub_models/models/facebook_denoiser/export.py +++ b/qai_hub_models/models/facebook_denoiser/export.py @@ -184,7 +184,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py index 4902b3a5..1c56e30d 100644 --- a/qai_hub_models/models/fastsam_s/export.py +++ b/qai_hub_models/models/fastsam_s/export.py @@ -199,7 +199,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py index 617e16c5..0bc1753c 100644 --- a/qai_hub_models/models/fastsam_x/export.py +++ b/qai_hub_models/models/fastsam_x/export.py @@ -199,7 +199,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fcn_resnet50/export.py b/qai_hub_models/models/fcn_resnet50/export.py index 3a5dd85a..78b7bd02 100644 --- a/qai_hub_models/models/fcn_resnet50/export.py +++ b/qai_hub_models/models/fcn_resnet50/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_122ns_lowres/export.py b/qai_hub_models/models/ffnet_122ns_lowres/export.py index 05689b50..cebe7622 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/export.py +++ b/qai_hub_models/models/ffnet_122ns_lowres/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_40s/export.py b/qai_hub_models/models/ffnet_40s/export.py index dac6aea6..40804e7e 100644 --- a/qai_hub_models/models/ffnet_40s/export.py +++ b/qai_hub_models/models/ffnet_40s/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py index 08ed7624..bb562224 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/export.py +++ b/qai_hub_models/models/ffnet_40s_quantized/export.py @@ -202,7 +202,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_54s/export.py b/qai_hub_models/models/ffnet_54s/export.py index 5a27ee33..ad77a2b3 100644 --- a/qai_hub_models/models/ffnet_54s/export.py +++ b/qai_hub_models/models/ffnet_54s/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py index 83dc6e73..a1b046f8 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/export.py +++ b/qai_hub_models/models/ffnet_54s_quantized/export.py @@ -202,7 +202,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_78s/export.py b/qai_hub_models/models/ffnet_78s/export.py index 09e78a83..f2fd96d5 100644 --- a/qai_hub_models/models/ffnet_78s/export.py +++ b/qai_hub_models/models/ffnet_78s/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_78s_lowres/export.py b/qai_hub_models/models/ffnet_78s_lowres/export.py index 08f9197f..b5d2fce0 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/export.py +++ b/qai_hub_models/models/ffnet_78s_lowres/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py index 49c83ae3..dd043de2 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/export.py +++ b/qai_hub_models/models/ffnet_78s_quantized/export.py @@ -202,7 +202,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/googlenet/export.py b/qai_hub_models/models/googlenet/export.py index 81262c38..d97de5a7 100644 --- a/qai_hub_models/models/googlenet/export.py +++ b/qai_hub_models/models/googlenet/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py index 7ed01e3e..15aa2751 100644 --- a/qai_hub_models/models/googlenet_quantized/export.py +++ b/qai_hub_models/models/googlenet_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/googlenet_quantized/model.py b/qai_hub_models/models/googlenet_quantized/model.py index e100c42a..0d486377 100644 --- a/qai_hub_models/models/googlenet_quantized/model.py +++ b/qai_hub_models/models/googlenet_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.googlenet.model import GoogLeNet from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime from qai_hub_models.utils.quantization_aimet import tie_aimet_observer_groups MODEL_ID = __name__.split(".")[-2] @@ -44,11 +43,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, diff --git a/qai_hub_models/models/hrnet_pose/export.py b/qai_hub_models/models/hrnet_pose/export.py index 338f35fd..0acf5119 100644 --- a/qai_hub_models/models/hrnet_pose/export.py +++ b/qai_hub_models/models/hrnet_pose/export.py @@ -116,7 +116,7 @@ def export_model( model_compile_options = model.get_hub_compile_options( target_runtime, compile_options - + " --force_channel_last_input image_tensor" + + " --force_channel_last_input image" + " --force_channel_last_output output_0", ) print(f"Optimizing model {model_name} to run on-device") @@ -156,7 +156,7 @@ def export_model( sample_inputs = model.sample_inputs(input_spec) # Convert inputs from channel first to channel last hub_inputs = transpose_channel_first_to_last( - "image_tensor", sample_inputs, target_runtime + "image", sample_inputs, target_runtime ) submitted_inference_job = hub.submit_inference_job( model=compile_job.get_target_model(), @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/hrnet_pose_quantized/export.py b/qai_hub_models/models/hrnet_pose_quantized/export.py index 57904574..ec61ebb0 100644 --- a/qai_hub_models/models/hrnet_pose_quantized/export.py +++ b/qai_hub_models/models/hrnet_pose_quantized/export.py @@ -121,7 +121,7 @@ def export_model( model_compile_options = model.get_hub_compile_options( target_runtime, compile_options - + " --force_channel_last_input image_tensor" + + " --force_channel_last_input image" + " --force_channel_last_output output_0", ) print(f"Optimizing model {model_name} to run on-device") @@ -165,7 +165,7 @@ def export_model( hub_inputs = get_qnn_inputs(compile_job, sample_inputs) # Convert inputs from channel first to channel last hub_inputs = transpose_channel_first_to_last( - "image_tensor", sample_inputs, target_runtime + "image", sample_inputs, target_runtime ) submitted_inference_job = hub.submit_inference_job( model=compile_job.get_target_model(), @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py index 5237cafc..59b3c4d5 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py @@ -180,7 +180,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml index e930ee5d..324eb6fa 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml @@ -19,6 +19,7 @@ aggregated: - Samsung Galaxy S23+ - Samsung Galaxy S24 - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro @@ -31,19 +32,19 @@ models: - name: HuggingFace-WavLM-Base-Plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 237767939.0 - throughput: 0.0042057815036197965 + inference_time: 911873.0 + throughput: 1.0966439405487387 estimated_peak_memory_range: - min: 11886592 - max: 15703120 - primary_compute_unit: NPU - precision: fp16 + min: 149282816 + max: 153276888 + primary_compute_unit: CPU + precision: fp32 layer_info: - layers_on_npu: 848 + layers_on_npu: 0 layers_on_gpu: 0 - layers_on_cpu: 0 + layers_on_cpu: 848 total_layers: 848 - job_id: jlpe928gr + job_id: jz57yq4q5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -52,7 +53,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-15T00:13:01.265817Z' + timestamp: '2024-03-26T15:30:16.725161Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' @@ -69,19 +70,19 @@ models: job_id: '' job_status: Skipped - torchscript_onnx_tflite: - inference_time: 174470189.0 - throughput: 0.005731638199807303 + inference_time: 712941.0 + throughput: 1.4026406112146728 estimated_peak_memory_range: - min: 11321344 - max: 711668304 - primary_compute_unit: NPU - precision: fp16 + min: 147787776 + max: 179693888 + primary_compute_unit: CPU + precision: fp32 layer_info: - layers_on_npu: 848 + layers_on_npu: 0 layers_on_gpu: 0 - layers_on_cpu: 0 + layers_on_cpu: 848 total_layers: 848 - job_id: jygzew4g8 + job_id: joprvzlvg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,7 +91,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:13:01.265830Z' + timestamp: '2024-03-26T15:30:16.725174Z' torchscript_onnx_qnn: inference_time: 'null' throughput: 'null' diff --git a/qai_hub_models/models/inception_v3/export.py b/qai_hub_models/models/inception_v3/export.py index 461d52ea..ff4ec962 100644 --- a/qai_hub_models/models/inception_v3/export.py +++ b/qai_hub_models/models/inception_v3/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py index a5b83180..b723f39f 100644 --- a/qai_hub_models/models/inception_v3_quantized/export.py +++ b/qai_hub_models/models/inception_v3_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/inception_v3_quantized/model.py b/qai_hub_models/models/inception_v3_quantized/model.py index 39dd2658..44c3fba5 100644 --- a/qai_hub_models/models/inception_v3_quantized/model.py +++ b/qai_hub_models/models/inception_v3_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.inception_v3.model import InceptionNetV3 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime from qai_hub_models.utils.quantization_aimet import tie_aimet_observer_groups MODEL_ID = __name__.split(".")[-2] @@ -47,11 +46,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -196,11 +190,3 @@ def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel): ], ] tie_aimet_observer_groups(groups) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py index 02ae013e..7393373c 100644 --- a/qai_hub_models/models/lama_dilated/export.py +++ b/qai_hub_models/models/lama_dilated/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py index 13f51b82..4c2df40a 100644 --- a/qai_hub_models/models/litehrnet/export.py +++ b/qai_hub_models/models/litehrnet/export.py @@ -184,7 +184,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_face/export.py b/qai_hub_models/models/mediapipe_face/export.py index fe627869..007ffb4d 100644 --- a/qai_hub_models/models/mediapipe_face/export.py +++ b/qai_hub_models/models/mediapipe_face/export.py @@ -27,7 +27,6 @@ from qai_hub_models.utils.qai_hub_helpers import ( can_access_qualcomm_ai_hub, export_without_hub_access, - transpose_channel_first_to_last, ) ALL_COMPONENTS = ["MediaPipeFaceDetector", "MediaPipeFaceLandmarkDetector"] @@ -127,7 +126,7 @@ def export_model( # 2. Compile the models to an on-device asset model_compile_options = component.get_hub_compile_options( - target_runtime, compile_options + " --force_channel_last_input image" + target_runtime, compile_options ) print(f"Optimizing model {component_name} to run on-device") submitted_compile_job = hub.submit_compile_job( @@ -170,13 +169,9 @@ def export_model( component_name ].get_hub_profile_options(target_runtime, profile_options) sample_inputs = components_dict[component_name].sample_inputs() - # Convert inputs from channel first to channel last - hub_inputs = transpose_channel_first_to_last( - "image", sample_inputs, target_runtime - ) submitted_inference_job = hub.submit_inference_job( model=compile_jobs[component_name].get_target_model(), - inputs=hub_inputs, + inputs=sample_inputs, device=hub.Device(device), name=f"{model_name}_{component_name}", options=profile_options_all, @@ -223,7 +218,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) + parser = export_parser( + model_cls=Model, components=ALL_COMPONENTS, supports_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py index 22ff6f03..2585cead 100644 --- a/qai_hub_models/models/mediapipe_hand/export.py +++ b/qai_hub_models/models/mediapipe_hand/export.py @@ -27,7 +27,6 @@ from qai_hub_models.utils.qai_hub_helpers import ( can_access_qualcomm_ai_hub, export_without_hub_access, - transpose_channel_first_to_last, ) ALL_COMPONENTS = ["MediaPipeHandDetector", "MediaPipeHandLandmarkDetector"] @@ -127,7 +126,7 @@ def export_model( # 2. Compile the models to an on-device asset model_compile_options = component.get_hub_compile_options( - target_runtime, compile_options + " --force_channel_last_input image" + target_runtime, compile_options ) print(f"Optimizing model {component_name} to run on-device") submitted_compile_job = hub.submit_compile_job( @@ -170,13 +169,9 @@ def export_model( component_name ].get_hub_profile_options(target_runtime, profile_options) sample_inputs = components_dict[component_name].sample_inputs() - # Convert inputs from channel first to channel last - hub_inputs = transpose_channel_first_to_last( - "image", sample_inputs, target_runtime - ) submitted_inference_job = hub.submit_inference_job( model=compile_jobs[component_name].get_target_model(), - inputs=hub_inputs, + inputs=sample_inputs, device=hub.Device(device), name=f"{model_name}_{component_name}", options=profile_options_all, @@ -223,7 +218,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) + parser = export_parser( + model_cls=Model, components=ALL_COMPONENTS, supports_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py index ec0c57a3..3ed5d886 100644 --- a/qai_hub_models/models/mediapipe_pose/export.py +++ b/qai_hub_models/models/mediapipe_pose/export.py @@ -27,7 +27,6 @@ from qai_hub_models.utils.qai_hub_helpers import ( can_access_qualcomm_ai_hub, export_without_hub_access, - transpose_channel_first_to_last, ) ALL_COMPONENTS = ["MediaPipePoseDetector", "MediaPipePoseLandmarkDetector"] @@ -127,7 +126,7 @@ def export_model( # 2. Compile the models to an on-device asset model_compile_options = component.get_hub_compile_options( - target_runtime, compile_options + " --force_channel_last_input image" + target_runtime, compile_options ) print(f"Optimizing model {component_name} to run on-device") submitted_compile_job = hub.submit_compile_job( @@ -170,13 +169,9 @@ def export_model( component_name ].get_hub_profile_options(target_runtime, profile_options) sample_inputs = components_dict[component_name].sample_inputs() - # Convert inputs from channel first to channel last - hub_inputs = transpose_channel_first_to_last( - "image", sample_inputs, target_runtime - ) submitted_inference_job = hub.submit_inference_job( model=compile_jobs[component_name].get_target_model(), - inputs=hub_inputs, + inputs=sample_inputs, device=hub.Device(device), name=f"{model_name}_{component_name}", options=profile_options_all, @@ -223,7 +218,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) + parser = export_parser( + model_cls=Model, components=ALL_COMPONENTS, supports_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_selfie/export.py b/qai_hub_models/models/mediapipe_selfie/export.py index ce654b17..9ef6cf58 100644 --- a/qai_hub_models/models/mediapipe_selfie/export.py +++ b/qai_hub_models/models/mediapipe_selfie/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mnasnet05/export.py b/qai_hub_models/models/mnasnet05/export.py index c500d613..794bb948 100644 --- a/qai_hub_models/models/mnasnet05/export.py +++ b/qai_hub_models/models/mnasnet05/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mobilenet_v2/export.py b/qai_hub_models/models/mobilenet_v2/export.py index d5d83eb6..67c9c88e 100644 --- a/qai_hub_models/models/mobilenet_v2/export.py +++ b/qai_hub_models/models/mobilenet_v2/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mobilenet_v2_quantized/export.py b/qai_hub_models/models/mobilenet_v2_quantized/export.py index f73c0796..e01ee5a7 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v2_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mobilenet_v2_quantized/model.py b/qai_hub_models/models/mobilenet_v2_quantized/model.py index d72efcb4..b8849fb8 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/model.py +++ b/qai_hub_models/models/mobilenet_v2_quantized/model.py @@ -23,7 +23,6 @@ ) from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime from qai_hub_models.utils.quantization_aimet import convert_all_depthwise_to_per_tensor MODEL_ID = __name__.split(".")[-2] @@ -47,11 +46,6 @@ def __init__( quant_sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -104,11 +98,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/mobilenet_v3_large/export.py b/qai_hub_models/models/mobilenet_v3_large/export.py index 5bb2fffd..2a013f28 100644 --- a/qai_hub_models/models/mobilenet_v3_large/export.py +++ b/qai_hub_models/models/mobilenet_v3_large/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py index 5305d6b9..ce0375ef 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py index 55b92db4..c26dbf43 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.mobilenet_v3_large.model import MobileNetV3Large from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 1 @@ -43,11 +42,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, diff --git a/qai_hub_models/models/mobilenet_v3_small/export.py b/qai_hub_models/models/mobilenet_v3_small/export.py index 92037cb9..7bde62c9 100644 --- a/qai_hub_models/models/mobilenet_v3_small/export.py +++ b/qai_hub_models/models/mobilenet_v3_small/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py index 780dff90..3fa8dac3 100644 --- a/qai_hub_models/models/openai_clip/export.py +++ b/qai_hub_models/models/openai_clip/export.py @@ -27,7 +27,6 @@ from qai_hub_models.utils.qai_hub_helpers import ( can_access_qualcomm_ai_hub, export_without_hub_access, - transpose_channel_first_to_last, ) ALL_COMPONENTS = ["CLIPTextEncoder", "CLIPImageEncoder"] @@ -127,7 +126,7 @@ def export_model( # 2. Compile the models to an on-device asset model_compile_options = component.get_hub_compile_options( - target_runtime, compile_options + " --force_channel_last_input image" + target_runtime, compile_options ) print(f"Optimizing model {component_name} to run on-device") submitted_compile_job = hub.submit_compile_job( @@ -170,13 +169,9 @@ def export_model( component_name ].get_hub_profile_options(target_runtime, profile_options) sample_inputs = components_dict[component_name].sample_inputs() - # Convert inputs from channel first to channel last - hub_inputs = transpose_channel_first_to_last( - "image", sample_inputs, target_runtime - ) submitted_inference_job = hub.submit_inference_job( model=compile_jobs[component_name].get_target_model(), - inputs=hub_inputs, + inputs=sample_inputs, device=hub.Device(device), name=f"{model_name}_{component_name}", options=profile_options_all, @@ -223,7 +218,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) + parser = export_parser( + model_cls=Model, components=ALL_COMPONENTS, supports_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml index 0989352c..2a6ddaed 100644 --- a/qai_hub_models/models/openai_clip/perf.yaml +++ b/qai_hub_models/models/openai_clip/perf.yaml @@ -17,63 +17,22 @@ aggregated: - Samsung Galaxy S23 - Samsung Galaxy S23 Ultra - Samsung Galaxy S23+ - - Samsung Galaxy S24 - - Samsung Galaxy S24 Ultra - Samsung Galaxy Tab S8 - Xiaomi 12 - Xiaomi 12 Pro supported_chipsets: - Snapdragon® 8 Gen 1 - Snapdragon® 8 Gen 2 - - Snapdragon® 8 Gen 3 - Snapdragon® 888 models: - name: CLIPTextEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 15516.0 - throughput: 64.44960041247744 + inference_time: 15528.0 + throughput: 64.39979392065945 estimated_peak_memory_range: - min: 49152 - max: 3267008 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 574 - layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 576 - job_id: jz5worjp1 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:47:17.422656Z' - torchscript_onnx_qnn: - inference_time: 15586.0 - throughput: 64.16014371872193 - estimated_peak_memory_range: - min: 45056 - max: 2975720 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 574 - layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 576 - job_id: jz57z1rp3 - job_status: Passed - - torchscript_onnx_tflite: - inference_time: 11115.0 - throughput: 89.9685110211426 - estimated_peak_memory_range: - min: 16384 - max: 204316144 + min: 40960 + max: 3106072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -81,62 +40,39 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: jnp10ml5q + job_id: j2p0m2veg job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-14T23:49:22.781059Z' torchscript_onnx_qnn: - inference_time: 11246.0 - throughput: 88.92050506846878 + inference_time: 8149.0 + throughput: 122.71444348999877 estimated_peak_memory_range: min: 40960 - max: 205502128 + max: 23728064 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 574 - layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 576 - job_id: j0pxv89g7 - job_status: Passed -- name: CLIPImageEncoder - performance_metrics: - - torchscript_onnx_tflite: - inference_time: 128196.0 - throughput: 7.800555399544447 - estimated_peak_memory_range: - min: 143360 - max: 3847064 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 575 + layers_on_npu: 377 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 575 - job_id: jmg9vqv57 + total_layers: 377 + job_id: jogk2q9og job_status: Passed reference_device_info: - name: Samsung Galaxy S23 + name: Samsung Galaxy S23 Ultra os: '13' form_factor: Phone os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-03-14T23:59:18.769511Z' - torchscript_onnx_qnn: - inference_time: 127795.0 - throughput: 7.825032278258147 + timestamp: '2024-02-21T16:25:08.294036Z' +- name: CLIPImageEncoder + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 127729.0 + throughput: 7.829075621041424 estimated_peak_memory_range: - min: 180224 - max: 4074336 + min: 159744 + max: 3867320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,43 +80,28 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 575 - job_id: jqp4q6lgo + job_id: j1p8em48p job_status: Passed - - torchscript_onnx_tflite: - inference_time: 98556.0 - throughput: 10.14651568651325 + torchscript_onnx_qnn: + inference_time: 50903.0 + throughput: 19.645207551617784 estimated_peak_memory_range: - min: 163840 - max: 781391856 + min: 86016 + max: 59741752 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 575 + layers_on_npu: 370 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 575 - job_id: jvgdwml5j + total_layers: 370 + job_id: jn5qlrmmp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Samsung Galaxy S23 Ultra + os: '13' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-03-15T00:01:23.890974Z' - torchscript_onnx_qnn: - inference_time: 97281.0 - throughput: 10.279499593959766 - estimated_peak_memory_range: - min: 237568 - max: 783870384 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 575 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 575 - job_id: jo5mr1qgk - job_status: Passed + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-02-21T16:30:00.084732Z' diff --git a/qai_hub_models/models/openpose/export.py b/qai_hub_models/models/openpose/export.py index 98bbb750..c432a301 100644 --- a/qai_hub_models/models/openpose/export.py +++ b/qai_hub_models/models/openpose/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/protocols.py b/qai_hub_models/models/protocols.py index e263482e..86b53dc9 100644 --- a/qai_hub_models/models/protocols.py +++ b/qai_hub_models/models/protocols.py @@ -19,12 +19,17 @@ from __future__ import annotations from abc import abstractmethod -from typing import Protocol, Type, TypeVar, runtime_checkable +from pathlib import Path +from typing import Any, Protocol, Type, TypeVar, runtime_checkable -from qai_hub.client import DatasetEntries +from qai_hub.client import DatasetEntries, SourceModel from qai_hub_models.evaluators.base_evaluators import BaseEvaluator, _DataLoader -from qai_hub_models.models.common import SampleInputsType, TargetRuntime +from qai_hub_models.models.common import ( + SampleInputsType, + SourceModelFormat, + TargetRuntime, +) from qai_hub_models.utils.input_spec import InputSpec FromPretrainedTypeVar = TypeVar("FromPretrainedTypeVar", bound="FromPretrainedProtocol") @@ -175,6 +180,51 @@ def from_pretrained( ... +class PretrainedHubModelProtocol(HubModelProtocol, FromPretrainedProtocol): + """ + All pretrained AI Hub Models must, at minimum, implement this interface. + """ + + @abstractmethod + def convert_to_torchscript( + self, input_spec: InputSpec | None = None, check_trace: bool = True + ) -> Any: + """ + Converts the torch module to a torchscript trace, which + is the format expected by qai hub. + + This is a default implementation that may be overriden by a subclass. + """ + ... + + def convert_to_hub_source_model( + self, + target_runtime: TargetRuntime, + output_path: str | Path, + input_spec: InputSpec | None = None, + check_trace: bool = True, + ) -> SourceModel: + ... + + def get_hub_compile_options( + self, + target_runtime: TargetRuntime, + other_compile_options: str = "", + ) -> str: + """ + AI Hub compile options recommended for the model. + """ + ... + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + """ + Source model format preferred for conversion on AI Hub. + """ + ... + + class FromPrecompiledProtocol(Protocol): """ Models follow this protocol if they can be initiated from a precompiled torch model. diff --git a/qai_hub_models/models/quicksrnetlarge/export.py b/qai_hub_models/models/quicksrnetlarge/export.py index 9e1976a4..ed3efe30 100644 --- a/qai_hub_models/models/quicksrnetlarge/export.py +++ b/qai_hub_models/models/quicksrnetlarge/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py index ea5568eb..27776bd4 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/export.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py index 9bca792d..e9185b68 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/model.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py @@ -16,6 +16,7 @@ from aimet_torch.cross_layer_equalization import equalize_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.quicksrnetlarge.model import QuickSRNetLarge from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config_legacy_v2 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -97,3 +98,11 @@ def from_pretrained( sim.model.eval() return cls(sim) + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + if target_runtime == TargetRuntime.QNN: + return SourceModelFormat.ONNX + else: + return SourceModelFormat.TORCHSCRIPT diff --git a/qai_hub_models/models/quicksrnetmedium/export.py b/qai_hub_models/models/quicksrnetmedium/export.py index ad1016dc..d98eed32 100644 --- a/qai_hub_models/models/quicksrnetmedium/export.py +++ b/qai_hub_models/models/quicksrnetmedium/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py index da516367..bc1ebf42 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/export.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py index 3b6cb7b6..939d8e67 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/model.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py @@ -16,6 +16,7 @@ from aimet_torch.cross_layer_equalization import equalize_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.quicksrnetmedium.model import QuickSRNetMedium from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config_legacy_v2 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -96,3 +97,11 @@ def from_pretrained( sim.model.eval() return cls(sim) + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + if target_runtime == TargetRuntime.QNN: + return SourceModelFormat.ONNX + else: + return SourceModelFormat.TORCHSCRIPT diff --git a/qai_hub_models/models/quicksrnetsmall/export.py b/qai_hub_models/models/quicksrnetsmall/export.py index f4ecbcca..ec138a32 100644 --- a/qai_hub_models/models/quicksrnetsmall/export.py +++ b/qai_hub_models/models/quicksrnetsmall/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py index 4944af8c..d5ad9ec4 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/export.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py index 5ba7fb5e..9102f5f9 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/model.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py @@ -16,6 +16,7 @@ from aimet_torch.cross_layer_equalization import equalize_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.quicksrnetsmall.model import QuickSRNetSmall from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config_legacy_v2 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -95,3 +96,11 @@ def from_pretrained( sim.model.eval() return cls(sim) + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + if target_runtime == TargetRuntime.QNN: + return SourceModelFormat.ONNX + else: + return SourceModelFormat.TORCHSCRIPT diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/export.py b/qai_hub_models/models/real_esrgan_general_x4v3/export.py index 7f5ce2b6..b91d81b8 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/export.py +++ b/qai_hub_models/models/real_esrgan_general_x4v3/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/real_esrgan_x4plus/export.py b/qai_hub_models/models/real_esrgan_x4plus/export.py index 73ab228c..a53fe803 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/export.py +++ b/qai_hub_models/models/real_esrgan_x4plus/export.py @@ -184,7 +184,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/regnet/export.py b/qai_hub_models/models/regnet/export.py index 2d4e54ce..911f9a7b 100644 --- a/qai_hub_models/models/regnet/export.py +++ b/qai_hub_models/models/regnet/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet101/export.py b/qai_hub_models/models/resnet101/export.py index 92c2ea4b..44b0bf51 100644 --- a/qai_hub_models/models/resnet101/export.py +++ b/qai_hub_models/models/resnet101/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py index 774ea807..2e1658f6 100644 --- a/qai_hub_models/models/resnet101_quantized/export.py +++ b/qai_hub_models/models/resnet101_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet101_quantized/model.py b/qai_hub_models/models/resnet101_quantized/model.py index bcfb3730..c0a803ef 100644 --- a/qai_hub_models/models/resnet101_quantized/model.py +++ b/qai_hub_models/models/resnet101_quantized/model.py @@ -23,7 +23,6 @@ from qai_hub_models.models.resnet101.model import ResNet101 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 4 @@ -49,11 +48,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -91,11 +85,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/resnet18/export.py b/qai_hub_models/models/resnet18/export.py index 6dc6e2b2..05728934 100644 --- a/qai_hub_models/models/resnet18/export.py +++ b/qai_hub_models/models/resnet18/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py index d8d39572..f0c0c9ab 100644 --- a/qai_hub_models/models/resnet18_quantized/export.py +++ b/qai_hub_models/models/resnet18_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet18_quantized/model.py b/qai_hub_models/models/resnet18_quantized/model.py index e000d973..4a147fcb 100644 --- a/qai_hub_models/models/resnet18_quantized/model.py +++ b/qai_hub_models/models/resnet18_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.resnet18.model import ResNet18 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 7 @@ -43,11 +42,6 @@ def __init__( resnet18_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -83,11 +77,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/resnet50/export.py b/qai_hub_models/models/resnet50/export.py index e7835d92..ec590470 100644 --- a/qai_hub_models/models/resnet50/export.py +++ b/qai_hub_models/models/resnet50/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext101/export.py b/qai_hub_models/models/resnext101/export.py index e1b1fcd6..1d43fc58 100644 --- a/qai_hub_models/models/resnext101/export.py +++ b/qai_hub_models/models/resnext101/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py index 865e3205..d59544be 100644 --- a/qai_hub_models/models/resnext101_quantized/export.py +++ b/qai_hub_models/models/resnext101_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext101_quantized/model.py b/qai_hub_models/models/resnext101_quantized/model.py index 98ea0d76..ce13db7a 100644 --- a/qai_hub_models/models/resnext101_quantized/model.py +++ b/qai_hub_models/models/resnext101_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.resnext101.model import ResNeXt101 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 4 @@ -43,11 +42,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -83,11 +77,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/resnext50/export.py b/qai_hub_models/models/resnext50/export.py index 7baf5fa2..3f66523c 100644 --- a/qai_hub_models/models/resnext50/export.py +++ b/qai_hub_models/models/resnext50/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py index 7f4a6bcc..5977503b 100644 --- a/qai_hub_models/models/resnext50_quantized/export.py +++ b/qai_hub_models/models/resnext50_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext50_quantized/model.py b/qai_hub_models/models/resnext50_quantized/model.py index dca50076..1e1a4930 100644 --- a/qai_hub_models/models/resnext50_quantized/model.py +++ b/qai_hub_models/models/resnext50_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.resnext50.model import ResNeXt50 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 1 @@ -43,11 +42,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -83,11 +77,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py index a5ed59d6..9fcbb110 100644 --- a/qai_hub_models/models/sam/export.py +++ b/qai_hub_models/models/sam/export.py @@ -230,7 +230,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False + model_cls=Model, + components=ALL_COMPONENTS, + supports_qnn=False, + supports_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sesr_m5/export.py b/qai_hub_models/models/sesr_m5/export.py index 51517850..ed685d85 100644 --- a/qai_hub_models/models/sesr_m5/export.py +++ b/qai_hub_models/models/sesr_m5/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py index 180d06e3..5c6e95d8 100644 --- a/qai_hub_models/models/sesr_m5_quantized/export.py +++ b/qai_hub_models/models/sesr_m5_quantized/export.py @@ -194,7 +194,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py index 7e2bac70..8782ceaf 100644 --- a/qai_hub_models/models/sesr_m5_quantized/model.py +++ b/qai_hub_models/models/sesr_m5_quantized/model.py @@ -17,6 +17,7 @@ from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim from qai_hub_models.models._shared.sesr.common import _load_sesr_source_model +from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.sesr_m5.model import ( NUM_CHANNELS, NUM_LBLOCKS, @@ -97,3 +98,11 @@ def from_pretrained( sim.model.eval() return cls(sim) + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + if target_runtime == TargetRuntime.QNN: + return SourceModelFormat.ONNX + else: + return SourceModelFormat.TORCHSCRIPT diff --git a/qai_hub_models/models/shufflenet_v2/export.py b/qai_hub_models/models/shufflenet_v2/export.py index cdb14643..1d84398e 100644 --- a/qai_hub_models/models/shufflenet_v2/export.py +++ b/qai_hub_models/models/shufflenet_v2/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/export.py b/qai_hub_models/models/shufflenet_v2_quantized/export.py index d4cd288c..bbc810cc 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/export.py +++ b/qai_hub_models/models/shufflenet_v2_quantized/export.py @@ -200,7 +200,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/model.py b/qai_hub_models/models/shufflenet_v2_quantized/model.py index ba13c2c4..783cc2b5 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/model.py +++ b/qai_hub_models/models/shufflenet_v2_quantized/model.py @@ -23,7 +23,6 @@ from qai_hub_models.models.shufflenet_v2.model import ShufflenetV2 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime from qai_hub_models.utils.quantization_aimet import ( convert_all_depthwise_to_per_tensor, tie_aimet_observer_groups, @@ -53,11 +52,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -98,14 +92,6 @@ def from_pretrained( sim.model.eval() return cls(sim) - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" - @classmethod def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel): """ diff --git a/qai_hub_models/models/sinet/export.py b/qai_hub_models/models/sinet/export.py index 780ac793..81eabff3 100644 --- a/qai_hub_models/models/sinet/export.py +++ b/qai_hub_models/models/sinet/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/squeezenet1_1/export.py b/qai_hub_models/models/squeezenet1_1/export.py index 1586bb36..917ebdfa 100644 --- a/qai_hub_models/models/squeezenet1_1/export.py +++ b/qai_hub_models/models/squeezenet1_1/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/squeezenet1_1_quantized/export.py b/qai_hub_models/models/squeezenet1_1_quantized/export.py index e4256985..b97b3e7f 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/export.py +++ b/qai_hub_models/models/squeezenet1_1_quantized/export.py @@ -198,7 +198,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/squeezenet1_1_quantized/model.py b/qai_hub_models/models/squeezenet1_1_quantized/model.py index 67a3f532..70ea93b9 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/model.py +++ b/qai_hub_models/models/squeezenet1_1_quantized/model.py @@ -20,7 +20,6 @@ from qai_hub_models.models.squeezenet1_1.model import SqueezeNet from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 2 @@ -43,11 +42,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -83,11 +77,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/stylegan2/export.py b/qai_hub_models/models/stylegan2/export.py index 520ed574..a430737a 100644 --- a/qai_hub_models/models/stylegan2/export.py +++ b/qai_hub_models/models/stylegan2/export.py @@ -185,7 +185,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/swin_base/export.py b/qai_hub_models/models/swin_base/export.py index b9e657bd..7b7b0968 100644 --- a/qai_hub_models/models/swin_base/export.py +++ b/qai_hub_models/models/swin_base/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/swin_small/export.py b/qai_hub_models/models/swin_small/export.py index ed29fb83..75769cb2 100644 --- a/qai_hub_models/models/swin_small/export.py +++ b/qai_hub_models/models/swin_small/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/swin_tiny/export.py b/qai_hub_models/models/swin_tiny/export.py index 3f43d39b..12d81836 100644 --- a/qai_hub_models/models/swin_tiny/export.py +++ b/qai_hub_models/models/swin_tiny/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py index 37af35b9..3e336783 100644 --- a/qai_hub_models/models/trocr/export.py +++ b/qai_hub_models/models/trocr/export.py @@ -27,7 +27,6 @@ from qai_hub_models.utils.qai_hub_helpers import ( can_access_qualcomm_ai_hub, export_without_hub_access, - transpose_channel_first_to_last, ) ALL_COMPONENTS = ["TrOCREncoder", "TrOCRDecoder"] @@ -127,7 +126,7 @@ def export_model( # 2. Compile the models to an on-device asset model_compile_options = component.get_hub_compile_options( - target_runtime, compile_options + " --force_channel_last_input pixel_values" + target_runtime, compile_options ) print(f"Optimizing model {component_name} to run on-device") submitted_compile_job = hub.submit_compile_job( @@ -170,13 +169,9 @@ def export_model( component_name ].get_hub_profile_options(target_runtime, profile_options) sample_inputs = components_dict[component_name].sample_inputs() - # Convert inputs from channel first to channel last - hub_inputs = transpose_channel_first_to_last( - "pixel_values", sample_inputs, target_runtime - ) submitted_inference_job = hub.submit_inference_job( model=compile_jobs[component_name].get_target_model(), - inputs=hub_inputs, + inputs=sample_inputs, device=hub.Device(device), name=f"{model_name}_{component_name}", options=profile_options_all, @@ -224,7 +219,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False + model_cls=Model, + components=ALL_COMPONENTS, + supports_qnn=False, + supports_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/unet_segmentation/export.py b/qai_hub_models/models/unet_segmentation/export.py index 11489e1b..acc1677d 100644 --- a/qai_hub_models/models/unet_segmentation/export.py +++ b/qai_hub_models/models/unet_segmentation/export.py @@ -197,7 +197,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py index bc43f4ae..e88a1575 100644 --- a/qai_hub_models/models/vit/export.py +++ b/qai_hub_models/models/vit/export.py @@ -191,7 +191,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/whisper_base_en/export.py b/qai_hub_models/models/whisper_base_en/export.py index 5eaddcb4..019486f5 100644 --- a/qai_hub_models/models/whisper_base_en/export.py +++ b/qai_hub_models/models/whisper_base_en/export.py @@ -219,7 +219,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False + model_cls=Model, + components=ALL_COMPONENTS, + supports_qnn=False, + supports_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py index 348716d4..4ab0b290 100644 --- a/qai_hub_models/models/whisper_small_en/export.py +++ b/qai_hub_models/models/whisper_small_en/export.py @@ -219,7 +219,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False + model_cls=Model, + components=ALL_COMPONENTS, + supports_qnn=False, + supports_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py index 57b7c76b..ecfa1e54 100644 --- a/qai_hub_models/models/whisper_tiny_en/export.py +++ b/qai_hub_models/models/whisper_tiny_en/export.py @@ -219,7 +219,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False + model_cls=Model, + components=ALL_COMPONENTS, + supports_qnn=False, + supports_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/wideresnet50/export.py b/qai_hub_models/models/wideresnet50/export.py index c86de65a..21d3369d 100644 --- a/qai_hub_models/models/wideresnet50/export.py +++ b/qai_hub_models/models/wideresnet50/export.py @@ -189,7 +189,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py index ad05928c..a8304a80 100644 --- a/qai_hub_models/models/wideresnet50_quantized/export.py +++ b/qai_hub_models/models/wideresnet50_quantized/export.py @@ -198,7 +198,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/wideresnet50_quantized/model.py b/qai_hub_models/models/wideresnet50_quantized/model.py index 9b1086a0..f1bd3f00 100644 --- a/qai_hub_models/models/wideresnet50_quantized/model.py +++ b/qai_hub_models/models/wideresnet50_quantized/model.py @@ -23,7 +23,6 @@ from qai_hub_models.models.wideresnet50.model import WideResNet50 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 2 @@ -46,11 +45,6 @@ def __init__( sim_model, ) - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - return SourceModelFormat.ONNX - @classmethod def from_pretrained( cls, @@ -88,11 +82,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - def get_hub_compile_options( - self, target_runtime: TargetRuntime, other_compile_options: str = "" - ) -> str: - compile_options = super().get_hub_compile_options( - target_runtime, other_compile_options - ) - return compile_options + " --quantize_full_type int8 --quantize_io" diff --git a/qai_hub_models/models/xlsr/export.py b/qai_hub_models/models/xlsr/export.py index c45d8d65..ae395081 100644 --- a/qai_hub_models/models/xlsr/export.py +++ b/qai_hub_models/models/xlsr/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py index d8b04a25..e5209fb7 100644 --- a/qai_hub_models/models/xlsr_quantized/export.py +++ b/qai_hub_models/models/xlsr_quantized/export.py @@ -202,7 +202,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/xlsr_quantized/model.py b/qai_hub_models/models/xlsr_quantized/model.py index 9747be53..cbf2ec5a 100644 --- a/qai_hub_models/models/xlsr_quantized/model.py +++ b/qai_hub_models/models/xlsr_quantized/model.py @@ -15,6 +15,7 @@ import torch from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.xlsr.model import XLSR, _load_xlsr_source_model from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -88,3 +89,11 @@ def from_pretrained( load_encodings_to_sim(sim, aimet_encodings) return cls(sim) + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + if target_runtime == TargetRuntime.QNN: + return SourceModelFormat.ONNX + else: + return SourceModelFormat.TORCHSCRIPT diff --git a/qai_hub_models/models/yolov6/export.py b/qai_hub_models/models/yolov6/export.py index 1fd64724..9b974e35 100644 --- a/qai_hub_models/models/yolov6/export.py +++ b/qai_hub_models/models/yolov6/export.py @@ -191,7 +191,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov7/export.py b/qai_hub_models/models/yolov7/export.py index 4a68cc79..77c632f9 100644 --- a/qai_hub_models/models/yolov7/export.py +++ b/qai_hub_models/models/yolov7/export.py @@ -191,7 +191,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_det/export.py b/qai_hub_models/models/yolov8_det/export.py index 2b207ddb..43880658 100644 --- a/qai_hub_models/models/yolov8_det/export.py +++ b/qai_hub_models/models/yolov8_det/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_seg/export.py b/qai_hub_models/models/yolov8_seg/export.py index 227c0a81..a39ac75e 100644 --- a/qai_hub_models/models/yolov8_seg/export.py +++ b/qai_hub_models/models/yolov8_seg/export.py @@ -193,7 +193,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py index 8b60a63d..688961f4 100644 --- a/qai_hub_models/utils/args.py +++ b/qai_hub_models/utils/args.py @@ -334,6 +334,7 @@ def export_parser( model_cls: Type[FromPretrainedTypeVar] | Type[FromPrecompiledTypeVar], components: Optional[List[str]] = None, supports_qnn=True, + supports_ort=True, exporting_compiled_model=False, ) -> argparse.ArgumentParser: """ @@ -348,6 +349,9 @@ def export_parser( supports_qnn: Whether QNN export is supported. Default=True. + supports_ort: + Whether ORT export is supported. + Default=True. exporting_compiled_model: True when exporting compiled model. If set, removing skip_profiling flag from export arguments. diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py index b0763a06..353db64c 100644 --- a/qai_hub_models/utils/base_model.py +++ b/qai_hub_models/utils/base_model.py @@ -18,8 +18,8 @@ from qai_hub_models.models.protocols import ( ExecutableModelProtocol, FromPrecompiledProtocol, - FromPretrainedProtocol, HubModelProtocol, + PretrainedHubModelProtocol, ) from qai_hub_models.utils.input_spec import InputSpec, make_torch_inputs @@ -93,7 +93,7 @@ def get_hub_profile_options( class BaseModel( torch.nn.Module, HubModel, - FromPretrainedProtocol, + PretrainedHubModelProtocol, ExecutableModelProtocol, ): """ @@ -154,6 +154,8 @@ def get_hub_compile_options( compile_options = "" if target_runtime == TargetRuntime.QNN: compile_options = "--target_runtime qnn_lib_aarch64_android" + if target_runtime == TargetRuntime.ORT: + compile_options = "--target_runtime onnx" if other_compile_options != "": return compile_options + " " + other_compile_options return compile_options diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py index ab79e346..14b4136c 100644 --- a/qai_hub_models/utils/config_loaders.py +++ b/qai_hub_models/utils/config_loaders.py @@ -8,7 +8,7 @@ from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union import requests from qai_hub.util.session import create_session @@ -472,6 +472,129 @@ def get_perf_details( return perf_details +class QAIHMModelCodeGen: + def __init__( + self, + is_aimet: bool, + has_on_target_demo: bool, + qnn_export_failure_reason: str, + tflite_export_failure_reason: str, + has_demo: bool, + check_trace: bool, + channel_last_input: List[str], + channel_last_output: List[str], + outputs_to_skip_validation: List[str], + export_test_model_kwargs: Dict[str, str], + components: Dict[str, Any], + default_components: List[str], + skip_tests: bool, + is_precompiled: bool, + no_assets: bool, + global_requirements_incompatible: bool, + torchscript_opt: List[str], + inference_metrics: str, + supports_ort: bool, + ) -> None: + self.is_aimet = is_aimet + self.has_on_target_demo = has_on_target_demo + self.qnn_export_failure_reason = qnn_export_failure_reason + self.tflite_export_failure_reason = tflite_export_failure_reason + self.has_demo = has_demo + self.check_trace = check_trace + self.channel_last_input = channel_last_input + self.channel_last_output = channel_last_output + self.outputs_to_skip_validation = outputs_to_skip_validation + self.export_test_model_kwargs = export_test_model_kwargs + self.components = components + self.default_components = default_components + self.skip_tests = skip_tests + self.is_precompiled = is_precompiled + self.no_assets = no_assets + self.global_requirements_incompatible = global_requirements_incompatible + self.torchscript_opt = torchscript_opt + self.inference_metrics = inference_metrics + self.supports_ort = supports_ort + + def validate(self) -> Tuple[bool, Optional[str]]: + """Returns false with a reason if the info spec for this model is not valid.""" + return True, None + + @classmethod + def from_model(cls: Type[QAIHMModelCodeGen], model_id: str) -> QAIHMModelCodeGen: + code_gen_path = QAIHM_MODELS_ROOT / model_id / "code-gen.yaml" + if not os.path.exists(code_gen_path): + raise ValueError(f"{model_id} does not exist") + return cls.from_yaml(code_gen_path) + + @classmethod + def from_yaml( + cls: Type[QAIHMModelCodeGen], code_gen_path: str | Path | None = None + ) -> QAIHMModelCodeGen: + # Load CFG and params + code_gen_config = QAIHMModelCodeGen.load_code_gen_yaml(code_gen_path) + return cls( + code_gen_config["is_aimet"], + code_gen_config["has_on_target_demo"], + code_gen_config["qnn_export_failure_reason"], + code_gen_config["tflite_export_failure_reason"], + code_gen_config["has_demo"], + code_gen_config["check_trace"], + code_gen_config["channel_last_input"], + code_gen_config["channel_last_output"], + code_gen_config["outputs_to_skip_validation"], + code_gen_config["export_test_model_kwargs"], + code_gen_config["components"], + code_gen_config["default_components"], + code_gen_config["skip_tests"], + code_gen_config["is_precompiled"], + code_gen_config["no_assets"], + code_gen_config["global_requirements_incompatible"], + code_gen_config["torchscript_opt"], + code_gen_config["inference_metrics"], + code_gen_config["supports_ort"], + ) + + # Schema for code-gen.yaml + CODE_GEN_YAML_SCHEMA = Schema( + And( + { + OptionalSchema("has_components", default=""): str, + OptionalSchema("is_aimet", default=False): bool, + OptionalSchema("has_on_target_demo", default=False): bool, + OptionalSchema("qnn_export_failure_reason", default=""): str, + OptionalSchema("tflite_export_failure_reason", default=""): str, + OptionalSchema("has_demo", default=True): bool, + OptionalSchema("check_trace", default=True): bool, + OptionalSchema("channel_last_input", default=[]): list, + OptionalSchema("channel_last_output", default=[]): list, + OptionalSchema("outputs_to_skip_validation", default=[]): list, + OptionalSchema("export_test_model_kwargs", default={}): dict, + OptionalSchema("components", default={}): dict, + OptionalSchema("default_components", default=[]): list, + OptionalSchema("skip_tests", default=False): bool, + OptionalSchema("is_precompiled", default=False): bool, + OptionalSchema("no_assets", default=False): bool, + OptionalSchema("global_requirements_incompatible", default=False): bool, + OptionalSchema("torchscript_opt", default=[]): list, + OptionalSchema("inference_metrics", default="psnr"): str, + OptionalSchema("supports_ort", default=False): bool, + } + ) + ) + + @staticmethod + def load_code_gen_yaml(path: str | Path | None = None): + if not path or not os.path.exists(path): + return QAIHMModelCodeGen.CODE_GEN_YAML_SCHEMA.validate({}) # Default Schema + data = load_yaml(path) + try: + # Validate high level-schema + data = QAIHMModelCodeGen.CODE_GEN_YAML_SCHEMA.validate(data) + except SchemaError as e: + assert 0, f"{e.code} in {path}" + return data + + class QAIHMModelInfo: def __init__( self, @@ -494,7 +617,7 @@ def __init__( form_factors: List[FORM_FACTOR], has_static_banner: bool, has_animated_banner: bool, - code_gen_config: Dict[str, str | bool], + code_gen_config: QAIHMModelCodeGen, license_type: str, deploy_license_type: str, dataset: List[str], @@ -593,9 +716,10 @@ def validate(self) -> Tuple[bool, Optional[str]]: if not os.path.exists(self.get_package_path() / "info.yaml"): return False, "All public models must have an info.yaml" - if self.code_gen_config.get( - "tflite_export_failure_reason", False - ) and self.code_gen_config.get("qnn_export_failure_reason", False): + if ( + self.code_gen_config.tflite_export_failure_reason + and self.code_gen_config.qnn_export_failure_reason + ): return False, "Public models must support at least one export path" session = create_session() @@ -684,20 +808,23 @@ def get_requirements_path(self, root: Path = QAIHM_PACKAGE_ROOT): def has_model_requirements(self, root: Path = QAIHM_PACKAGE_ROOT): return os.path.exists(self.get_requirements_path(root)) - @staticmethod - def from_model(model_id: str): + @classmethod + def from_model(cls: Type[QAIHMModelInfo], model_id: str) -> QAIHMModelInfo: schema_path = QAIHM_MODELS_ROOT / model_id / "info.yaml" code_gen_path = QAIHM_MODELS_ROOT / model_id / "code-gen.yaml" if not os.path.exists(schema_path): raise ValueError(f"{model_id} does not exist") - return QAIHMModelInfo.from_yaml(schema_path, code_gen_path) - - @staticmethod - def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None): + return cls.from_yaml(schema_path, code_gen_path) + + @classmethod + def from_yaml( + cls: Type[QAIHMModelInfo], + info_path: str | Path, + code_gen_path: str | Path | None = None, + ) -> QAIHMModelInfo: # Load CFG and params info_yaml = QAIHMModelInfo.load_info_yaml(info_path) - code_gen_config = QAIHMModelInfo.load_code_gen_yaml(code_gen_path) - return QAIHMModelInfo( + return cls( info_yaml["name"], info_yaml["id"], MODEL_STATUS.from_string(info_yaml["status"]), @@ -717,7 +844,7 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None): [FORM_FACTOR.from_string(ff) for ff in info_yaml["form_factors"]], info_yaml["has_static_banner"], info_yaml["has_animated_banner"], - code_gen_config, + QAIHMModelCodeGen.from_yaml(code_gen_path), info_yaml["license_type"], info_yaml["deploy_license_type"], info_yaml["dataset"], @@ -755,33 +882,6 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None): ) ) - # Schema for code-gen.yaml - CODE_GEN_YAML_SCHEMA = Schema( - And( - { - OptionalSchema("has_components", default=""): str, - OptionalSchema("is_aimet", default=False): bool, - OptionalSchema("has_on_target_demo", default=False): bool, - OptionalSchema("qnn_export_failure_reason", default=""): str, - OptionalSchema("tflite_export_failure_reason", default=""): str, - OptionalSchema("has_demo", default=True): bool, - OptionalSchema("check_trace", default=True): bool, - OptionalSchema("channel_last_input", default=""): str, - OptionalSchema("channel_last_output", default=""): str, - OptionalSchema("outputs_to_skip_validation", default=[]): list, - OptionalSchema("export_test_model_kwargs", default={}): dict, - OptionalSchema("components", default={}): dict, - OptionalSchema("default_components", default=[]): list, - OptionalSchema("skip_tests", default=False): bool, - OptionalSchema("is_precompiled", default=False): bool, - OptionalSchema("no_assets", default=False): bool, - OptionalSchema("global_requirements_incompatible", default=False): bool, - OptionalSchema("torchscript_opt", default=[]): list, - OptionalSchema("inference_metrics", default="psnr"): str, - } - ) - ) - @staticmethod def load_info_yaml(path: str | Path) -> Dict[str, Any]: data = load_yaml(path) @@ -791,15 +891,3 @@ def load_info_yaml(path: str | Path) -> Dict[str, Any]: except SchemaError as e: assert 0, f"{e.code} in {path}" return data - - @staticmethod - def load_code_gen_yaml(path: str | Path | None): - if not path or not os.path.exists(path): - return QAIHMModelInfo.CODE_GEN_YAML_SCHEMA.validate({}) # Default Schema - data = load_yaml(path) - try: - # Validate high level-schema - data = QAIHMModelInfo.CODE_GEN_YAML_SCHEMA.validate(data) - except SchemaError as e: - assert 0, f"{e.code} in {path}" - return data diff --git a/qai_hub_models/utils/printing.py b/qai_hub_models/utils/printing.py index 5efd51ec..f47f17e2 100644 --- a/qai_hub_models/utils/printing.py +++ b/qai_hub_models/utils/printing.py @@ -75,6 +75,8 @@ def print_profile_metrics_from_job( runtime = TargetRuntime.TFLITE elif is_qnn_hub_model(profile_job.model): runtime = TargetRuntime.QNN + elif profile_job.model.model_type == SourceModelType.ORT: + runtime = TargetRuntime.ORT else: raise NotImplementedError() diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py index 56517e76..665b3261 100644 --- a/qai_hub_models/utils/quantization_aimet.py +++ b/qai_hub_models/utils/quantization_aimet.py @@ -46,7 +46,7 @@ from qai_hub_models.models.common import SourceModelFormat, TargetRuntime from qai_hub_models.models.protocols import ( EvalModelProtocol, - HubModelProtocol, + PretrainedHubModelProtocol, QuantizableModelProtocol, ) from qai_hub_models.utils.input_spec import InputSpec, make_torch_inputs @@ -103,7 +103,7 @@ def convert_depthwise_to_per_tensor(op, parent_module, name): ) -class AIMETQuantizableMixin(HubModelProtocol, QuantizableModelProtocol): +class AIMETQuantizableMixin(PretrainedHubModelProtocol, QuantizableModelProtocol): """ Mixin that allows a model to be quantized & exported to disk using AIMET. @@ -118,14 +118,6 @@ def __init__( self.quant_sim = quant_sim self.needs_onnx_direct_aimet_export = needs_onnx_direct_aimet_export - def preferred_hub_source_model_format( - self, target_runtime: TargetRuntime - ) -> SourceModelFormat: - if target_runtime == TargetRuntime.QNN: - return SourceModelFormat.ONNX - else: - return SourceModelFormat.TORCHSCRIPT - def quantize( self, data: _DataLoader, @@ -320,3 +312,16 @@ def get_calibration_data( input_spec = self.get_input_spec() inputs = make_torch_inputs(input_spec) return {k: v.numpy() for k, v in zip(input_spec.keys(), inputs)} + + def get_hub_compile_options( + self, target_runtime: TargetRuntime, other_compile_options: str = "" + ) -> str: + compile_options = super().get_hub_compile_options( + target_runtime, other_compile_options + ) + return compile_options + " --quantize_full_type int8 --quantize_io" + + def preferred_hub_source_model_format( + self, target_runtime: TargetRuntime + ) -> SourceModelFormat: + return SourceModelFormat.ONNX