From 5bae6e829b6076ea9aaabd8be75aa27749b5b7f3 Mon Sep 17 00:00:00 2001
From: Qualcomm AI Stack Models Bot <quic_qaihm_bot@quicinc.com>
Date: Mon, 18 Mar 2024 16:37:50 -0700
Subject: [PATCH] v0.4.0

Signed-off-by: QAIHM Team <quic_qaihm_bot@quicinc.com>
---
 .gitattributes                                |    1 +
 .gitignore                                    |  115 --
 README.md                                     |  118 +-
 apps/android/ImageClassification/README.md    |   72 ++
 apps/android/ImageClassification/build.gradle |   10 +
 apps/android/ImageClassification/build_apk.py |  163 +++
 .../classification/build.gradle               |   63 ++
 .../classification/proguard-rules.pro         |   21 +
 .../src/main/AndroidManifest.xml              |   37 +
 .../src/main/assets/Sample1.png               |    3 +
 .../src/main/assets/Sample2.png               |    3 +
 .../src/main/assets/Sample3.png               |    3 +
 .../src/main/assets/Sample4.png               |    3 +
 .../src/main/assets/Sample5.png               |    3 +
 .../classification/src/main/assets/labels.txt | 1001 +++++++++++++++++
 .../ImageClassification.java                  |  214 ++++
 .../ImageClassificationResult.java            |   32 +
 .../qcom/imageclassification/QNNActivity.java |  177 +++
 .../com/qcom/imageclassification/Result.java  |   27 +
 .../com/qcom/imageclassification/Utils.java   |   27 +
 .../drawable-v24/ic_launcher_foreground.xml   |   30 +
 .../res/drawable/ic_launcher_background.xml   |  170 +++
 .../drawable/image_classification_icon.png    |    3 +
 .../res/layout/activity_classification.xml    |   93 ++
 .../res/mipmap-anydpi-v26/ic_launcher.xml     |    5 +
 .../mipmap-anydpi-v26/ic_launcher_round.xml   |    6 +
 .../src/main/res/mipmap-hdpi/ic_launcher.png  |    3 +
 .../res/mipmap-hdpi/ic_launcher_round.png     |    3 +
 .../src/main/res/mipmap-mdpi/ic_launcher.png  |    3 +
 .../res/mipmap-mdpi/ic_launcher_round.png     |    3 +
 .../src/main/res/mipmap-xhdpi/ic_launcher.png |    3 +
 .../res/mipmap-xhdpi/ic_launcher_round.png    |    3 +
 .../main/res/mipmap-xxhdpi/ic_launcher.png    |    3 +
 .../res/mipmap-xxhdpi/ic_launcher_round.png   |    3 +
 .../main/res/mipmap-xxxhdpi/ic_launcher.png   |    3 +
 .../res/mipmap-xxxhdpi/ic_launcher_round.png  |    3 +
 .../src/main/res/values-night/themes.xml      |   17 +
 .../src/main/res/values/colors.xml            |   11 +
 .../src/main/res/values/strings.xml           |    4 +
 .../src/main/res/values/themes.xml            |   18 +
 .../ImageClassification/gradle.properties     |   20 +
 .../gradle/wrapper/gradle-wrapper.jar         |    3 +
 .../gradle/wrapper/gradle-wrapper.properties  |    6 +
 apps/android/ImageClassification/gradlew      |  185 +++
 apps/android/ImageClassification/gradlew.bat  |   89 ++
 .../ImageClassification/settings.gradle       |   29 +
 apps/android/ImageSuperResolution/README.md   |   66 ++
 .../android/ImageSuperResolution/build.gradle |   10 +
 .../ImageSuperResolution/build.properties     |    2 +
 .../android/ImageSuperResolution/build_apk.py |  182 +++
 .../ImageSuperResolution/gradle.properties    |   20 +
 .../gradle/wrapper/gradle-wrapper.jar         |    3 +
 .../gradle/wrapper/gradle-wrapper.properties  |    6 +
 apps/android/ImageSuperResolution/gradlew     |  185 +++
 apps/android/ImageSuperResolution/gradlew.bat |   89 ++
 .../ImageSuperResolution/settings.gradle      |   27 +
 .../superresolution/build.gradle              |   71 ++
 .../superresolution/proguard-rules.pro        |   21 +
 .../src/main/AndroidManifest.xml              |   36 +
 .../src/main/assets/Sample1.jpg               |    3 +
 .../src/main/assets/Sample2.jpg               |    3 +
 .../com/qcom/imagesuperres/QNNActivity.java   |  243 ++++
 .../java/com/qcom/imagesuperres/Result.java   |   38 +
 .../qcom/imagesuperres/SuperResolution.java   |  165 +++
 .../imagesuperres/SuperResolutionResult.java  |   19 +
 .../java/com/qcom/imagesuperres/Utils.java    |   36 +
 .../com/qcom/imagesuperres/UtilsESRGAN.java   |   35 +
 .../drawable-v24/ic_launcher_foreground.xml   |   30 +
 .../res/drawable/ic_launcher_background.xml   |  170 +++
 .../src/main/res/layout/activity_superres.xml |  140 +++
 .../res/mipmap-anydpi-v26/ic_launcher.xml     |    5 +
 .../mipmap-anydpi-v26/ic_launcher_round.xml   |    6 +
 .../src/main/res/mipmap-hdpi/ic_launcher.png  |    3 +
 .../res/mipmap-hdpi/ic_launcher_round.png     |    3 +
 .../src/main/res/mipmap-mdpi/ic_launcher.png  |    3 +
 .../res/mipmap-mdpi/ic_launcher_round.png     |    3 +
 .../src/main/res/mipmap-xhdpi/ic_launcher.png |    3 +
 .../res/mipmap-xhdpi/ic_launcher_round.png    |    3 +
 .../main/res/mipmap-xxhdpi/ic_launcher.png    |    3 +
 .../res/mipmap-xxhdpi/ic_launcher_round.png   |    3 +
 .../main/res/mipmap-xxxhdpi/ic_launcher.png   |    3 +
 .../res/mipmap-xxxhdpi/ic_launcher_round.png  |    3 +
 .../src/main/res/values-night/themes.xml      |   17 +
 .../src/main/res/values/colors.xml            |   11 +
 .../src/main/res/values/strings.xml           |    4 +
 .../src/main/res/values/themes.xml            |   18 +
 qai_hub_models/_version.py                    |    2 +-
 qai_hub_models/asset_bases.yaml               |    4 +-
 qai_hub_models/conftest.py                    |    7 +
 qai_hub_models/global_requirements.txt        |   44 +
 .../_shared/cityscapes_segmentation/demo.py   |    4 +-
 qai_hub_models/models/_shared/common.py       |   35 +-
 qai_hub_models/models/_shared/deeplab/demo.py |    5 +-
 qai_hub_models/models/_shared/detr/demo.py    |    5 +-
 qai_hub_models/models/_shared/detr/model.py   |    2 +-
 qai_hub_models/models/_shared/fastsam/demo.py |   35 +-
 .../models/_shared/fastsam/model.py           |    2 +-
 qai_hub_models/models/_shared/ffnet/model.py  |   11 +
 .../_shared/imagenet_classifier/demo.py       |   21 +-
 .../_shared/imagenet_classifier/model.py      |   19 +-
 .../_shared/imagenet_classifier/test_utils.py |    2 +-
 .../models/_shared/quicksrnet/common.py       |   24 +-
 qai_hub_models/models/_shared/repaint/app.py  |   38 +-
 qai_hub_models/models/_shared/repaint/demo.py |   12 +-
 qai_hub_models/models/_shared/sesr/common.py  |   12 +-
 .../models/_shared/super_resolution/demo.py   |    4 +-
 .../models/_shared/video_classifier/model.py  |    2 +-
 .../models/_shared/whisper/__init__.py        |    4 +
 .../{whisper_asr => _shared/whisper}/app.py   |    6 +-
 .../{whisper_asr => _shared/whisper}/demo.py  |   14 +-
 .../{whisper_asr => _shared/whisper}/model.py |   49 +-
 .../test.py => _shared/whisper/test_utils.py} |   36 +-
 qai_hub_models/models/_shared/yolo/demo.py    |    5 +-
 qai_hub_models/models/_shared/yolo/utils.py   |    4 +-
 qai_hub_models/models/aotgan/README.md        |   54 +
 qai_hub_models/models/aotgan/__init__.py      |   10 +
 qai_hub_models/models/aotgan/conftest.py      |   26 +
 qai_hub_models/models/aotgan/demo.py          |   19 +
 qai_hub_models/models/aotgan/export.py        |  206 ++++
 qai_hub_models/models/aotgan/info.yaml        |   31 +
 qai_hub_models/models/aotgan/model.py         |  131 +++
 .../models/aotgan/patches/layer_norm.diff     |   14 +
 qai_hub_models/models/aotgan/perf.yaml        |  108 ++
 qai_hub_models/models/aotgan/test.py          |   68 ++
 .../models/baichuan_7b_quantized/README.md    |    4 +-
 .../models/baichuan_7b_quantized/info.yaml    |    4 +-
 qai_hub_models/models/common.py               |   24 +
 .../models/controlnet_quantized/README.md     |    6 +-
 .../models/controlnet_quantized/export.py     |   77 +-
 .../models/controlnet_quantized/info.yaml     |    2 +
 .../models/controlnet_quantized/model.py      |   41 +-
 .../controlnet_quantized/requirements.txt     |    3 +-
 .../models/controlnet_quantized/test.py       |    5 +
 qai_hub_models/models/convnext_tiny/README.md |    6 +-
 .../models/convnext_tiny/conftest.py          |   24 +
 qai_hub_models/models/convnext_tiny/demo.py   |    4 +-
 qai_hub_models/models/convnext_tiny/export.py |   42 +-
 qai_hub_models/models/convnext_tiny/info.yaml |    2 +
 qai_hub_models/models/convnext_tiny/model.py  |    2 +-
 qai_hub_models/models/convnext_tiny/perf.yaml |   59 +-
 qai_hub_models/models/convnext_tiny/test.py   |    3 +
 qai_hub_models/models/ddrnet23_slim/README.md |    6 +-
 .../models/ddrnet23_slim/conftest.py          |   26 +
 qai_hub_models/models/ddrnet23_slim/demo.py   |    4 +-
 qai_hub_models/models/ddrnet23_slim/export.py |   42 +-
 qai_hub_models/models/ddrnet23_slim/info.yaml |    2 +
 qai_hub_models/models/ddrnet23_slim/perf.yaml |   59 +-
 qai_hub_models/models/ddrnet23_slim/test.py   |    2 +
 .../models/deeplabv3_resnet50/README.md       |    6 +-
 .../models/deeplabv3_resnet50/conftest.py     |   26 +
 .../models/deeplabv3_resnet50/demo.py         |    4 +-
 .../models/deeplabv3_resnet50/export.py       |   39 +-
 .../models/deeplabv3_resnet50/info.yaml       |    2 +
 .../models/deeplabv3_resnet50/model.py        |   18 +-
 .../models/deeplabv3_resnet50/perf.yaml       |   69 +-
 .../models/deeplabv3_resnet50/test.py         |    2 +
 qai_hub_models/models/densenet121/README.md   |    6 +-
 qai_hub_models/models/densenet121/conftest.py |   24 +
 qai_hub_models/models/densenet121/demo.py     |    4 +-
 qai_hub_models/models/densenet121/export.py   |   42 +-
 qai_hub_models/models/densenet121/info.yaml   |    2 +
 qai_hub_models/models/densenet121/model.py    |    2 +-
 qai_hub_models/models/densenet121/perf.yaml   |   69 +-
 qai_hub_models/models/densenet121/test.py     |    3 +
 .../models/detr_resnet101/README.md           |    6 +-
 .../models/detr_resnet101/conftest.py         |   24 +
 qai_hub_models/models/detr_resnet101/demo.py  |    2 +-
 .../models/detr_resnet101/export.py           |   42 +-
 .../models/detr_resnet101/info.yaml           |    2 +
 .../models/detr_resnet101/perf.yaml           |   65 +-
 .../models/detr_resnet101/requirements.txt    |    4 +-
 qai_hub_models/models/detr_resnet101/test.py  |    3 +
 .../models/detr_resnet101_dc5/README.md       |    6 +-
 .../models/detr_resnet101_dc5/conftest.py     |   24 +
 .../models/detr_resnet101_dc5/demo.py         |    2 +-
 .../models/detr_resnet101_dc5/export.py       |   42 +-
 .../models/detr_resnet101_dc5/info.yaml       |    2 +
 .../models/detr_resnet101_dc5/perf.yaml       |   65 +-
 .../detr_resnet101_dc5/requirements.txt       |    4 +-
 .../models/detr_resnet101_dc5/test.py         |    3 +
 qai_hub_models/models/detr_resnet50/README.md |    6 +-
 .../models/detr_resnet50/conftest.py          |   24 +
 qai_hub_models/models/detr_resnet50/demo.py   |    2 +-
 qai_hub_models/models/detr_resnet50/export.py |   42 +-
 qai_hub_models/models/detr_resnet50/info.yaml |    2 +
 qai_hub_models/models/detr_resnet50/perf.yaml |   65 +-
 .../models/detr_resnet50/requirements.txt     |    4 +-
 qai_hub_models/models/detr_resnet50/test.py   |    3 +
 .../models/detr_resnet50_dc5/README.md        |    6 +-
 .../models/detr_resnet50_dc5/conftest.py      |   24 +
 .../models/detr_resnet50_dc5/demo.py          |    2 +-
 .../models/detr_resnet50_dc5/export.py        |   42 +-
 .../models/detr_resnet50_dc5/info.yaml        |    2 +
 .../models/detr_resnet50_dc5/perf.yaml        |   65 +-
 .../models/detr_resnet50_dc5/requirements.txt |    4 +-
 .../models/detr_resnet50_dc5/test.py          |    3 +
 .../models/efficientnet_b0/README.md          |    6 +-
 .../models/efficientnet_b0/conftest.py        |   24 +
 qai_hub_models/models/efficientnet_b0/demo.py |    4 +-
 .../models/efficientnet_b0/export.py          |   42 +-
 .../models/efficientnet_b0/info.yaml          |    2 +
 .../models/efficientnet_b0/model.py           |    2 +-
 .../models/efficientnet_b0/perf.yaml          |   69 +-
 qai_hub_models/models/efficientnet_b0/test.py |    3 +
 qai_hub_models/models/esrgan/README.md        |    6 +-
 qai_hub_models/models/esrgan/conftest.py      |   26 +
 qai_hub_models/models/esrgan/demo.py          |    1 +
 qai_hub_models/models/esrgan/export.py        |   39 +-
 qai_hub_models/models/esrgan/info.yaml        |    2 +
 qai_hub_models/models/esrgan/perf.yaml        |   69 +-
 qai_hub_models/models/esrgan/test.py          |    2 +
 .../models/facebook_denoiser/README.md        |    6 +-
 .../models/facebook_denoiser/app.py           |   14 +-
 .../models/facebook_denoiser/conftest.py      |   26 +
 .../models/facebook_denoiser/demo.py          |   53 +-
 .../models/facebook_denoiser/export.py        |   43 +-
 .../models/facebook_denoiser/info.yaml        |    2 +
 .../models/facebook_denoiser/model.py         |   26 +-
 .../models/facebook_denoiser/perf.yaml        |   59 +-
 .../models/facebook_denoiser/requirements.txt |    4 +-
 .../models/facebook_denoiser/test.py          |   13 +-
 qai_hub_models/models/fastsam_s/README.md     |    6 +-
 qai_hub_models/models/fastsam_s/conftest.py   |   24 +
 qai_hub_models/models/fastsam_s/demo.py       |    2 +-
 qai_hub_models/models/fastsam_s/export.py     |   42 +-
 qai_hub_models/models/fastsam_s/info.yaml     |    2 +
 qai_hub_models/models/fastsam_s/perf.yaml     |   59 +-
 .../models/fastsam_s/requirements.txt         |    3 +-
 qai_hub_models/models/fastsam_x/README.md     |    6 +-
 qai_hub_models/models/fastsam_x/conftest.py   |   24 +
 qai_hub_models/models/fastsam_x/demo.py       |    2 +-
 qai_hub_models/models/fastsam_x/export.py     |   42 +-
 qai_hub_models/models/fastsam_x/info.yaml     |    2 +
 qai_hub_models/models/fastsam_x/perf.yaml     |   59 +-
 .../models/fastsam_x/requirements.txt         |    3 +-
 qai_hub_models/models/fcn_resnet50/README.md  |    6 +-
 .../models/fcn_resnet50/conftest.py           |   26 +
 qai_hub_models/models/fcn_resnet50/demo.py    |    4 +-
 qai_hub_models/models/fcn_resnet50/export.py  |   42 +-
 qai_hub_models/models/fcn_resnet50/info.yaml  |    2 +
 qai_hub_models/models/fcn_resnet50/perf.yaml  |   65 +-
 qai_hub_models/models/fcn_resnet50/test.py    |    3 +
 .../models/ffnet_122ns_lowres/README.md       |    6 +-
 .../models/ffnet_122ns_lowres/conftest.py     |   26 +
 .../models/ffnet_122ns_lowres/export.py       |   39 +-
 .../models/ffnet_122ns_lowres/info.yaml       |    4 +-
 .../models/ffnet_122ns_lowres/perf.yaml       |   69 +-
 .../ffnet_122ns_lowres/requirements.txt       |    2 +-
 qai_hub_models/models/ffnet_40s/README.md     |    6 +-
 qai_hub_models/models/ffnet_40s/conftest.py   |   26 +
 qai_hub_models/models/ffnet_40s/export.py     |   39 +-
 qai_hub_models/models/ffnet_40s/info.yaml     |    4 +-
 qai_hub_models/models/ffnet_40s/perf.yaml     |   69 +-
 .../models/ffnet_40s/requirements.txt         |    2 +-
 .../models/ffnet_40s_quantized/README.md      |    6 +-
 .../models/ffnet_40s_quantized/conftest.py    |   26 +
 .../models/ffnet_40s_quantized/export.py      |   37 +-
 .../models/ffnet_40s_quantized/info.yaml      |    4 +-
 .../models/ffnet_40s_quantized/perf.yaml      |   61 +-
 .../models/ffnet_40s_quantized/test.py        |    1 +
 qai_hub_models/models/ffnet_54s/README.md     |    6 +-
 qai_hub_models/models/ffnet_54s/conftest.py   |   26 +
 qai_hub_models/models/ffnet_54s/export.py     |   39 +-
 qai_hub_models/models/ffnet_54s/info.yaml     |    4 +-
 qai_hub_models/models/ffnet_54s/perf.yaml     |   69 +-
 .../models/ffnet_54s/requirements.txt         |    2 +-
 .../models/ffnet_54s_quantized/README.md      |    6 +-
 .../models/ffnet_54s_quantized/conftest.py    |   26 +
 .../models/ffnet_54s_quantized/export.py      |   37 +-
 .../models/ffnet_54s_quantized/info.yaml      |    4 +-
 .../models/ffnet_54s_quantized/perf.yaml      |   61 +-
 .../models/ffnet_54s_quantized/test.py        |    1 +
 qai_hub_models/models/ffnet_78s/README.md     |    6 +-
 qai_hub_models/models/ffnet_78s/conftest.py   |   26 +
 qai_hub_models/models/ffnet_78s/export.py     |   39 +-
 qai_hub_models/models/ffnet_78s/info.yaml     |    4 +-
 qai_hub_models/models/ffnet_78s/perf.yaml     |   69 +-
 .../models/ffnet_78s/requirements.txt         |    2 +-
 .../models/ffnet_78s_lowres/README.md         |    6 +-
 .../models/ffnet_78s_lowres/conftest.py       |   26 +
 .../models/ffnet_78s_lowres/export.py         |   39 +-
 .../models/ffnet_78s_lowres/info.yaml         |    4 +-
 .../models/ffnet_78s_lowres/perf.yaml         |   69 +-
 .../models/ffnet_78s_lowres/requirements.txt  |    2 +-
 .../models/ffnet_78s_quantized/README.md      |    6 +-
 .../models/ffnet_78s_quantized/conftest.py    |   26 +
 .../models/ffnet_78s_quantized/export.py      |   37 +-
 .../models/ffnet_78s_quantized/info.yaml      |    4 +-
 .../models/ffnet_78s_quantized/perf.yaml      |   61 +-
 .../models/ffnet_78s_quantized/test.py        |    1 +
 qai_hub_models/models/googlenet/README.md     |    6 +-
 qai_hub_models/models/googlenet/conftest.py   |   24 +
 qai_hub_models/models/googlenet/demo.py       |    4 +-
 qai_hub_models/models/googlenet/export.py     |   42 +-
 qai_hub_models/models/googlenet/info.yaml     |    2 +
 qai_hub_models/models/googlenet/model.py      |    6 +-
 qai_hub_models/models/googlenet/perf.yaml     |   77 +-
 qai_hub_models/models/googlenet/test.py       |    3 +
 .../models/googlenet_quantized/README.md      |    6 +-
 .../models/googlenet_quantized/conftest.py    |   24 +
 .../models/googlenet_quantized/demo.py        |    7 +-
 .../models/googlenet_quantized/export.py      |   42 +-
 .../models/googlenet_quantized/info.yaml      |    4 +-
 .../models/googlenet_quantized/model.py       |   58 +-
 .../models/googlenet_quantized/perf.yaml      |   85 +-
 .../models/googlenet_quantized/test.py        |   11 -
 qai_hub_models/models/hrnet_pose/README.md    |    6 +-
 qai_hub_models/models/hrnet_pose/conftest.py  |   26 +
 qai_hub_models/models/hrnet_pose/demo.py      |    4 +-
 qai_hub_models/models/hrnet_pose/export.py    |   42 +-
 qai_hub_models/models/hrnet_pose/info.yaml    |    2 +
 qai_hub_models/models/hrnet_pose/model.py     |   14 +-
 qai_hub_models/models/hrnet_pose/perf.yaml    |   69 +-
 .../models/hrnet_pose/requirements.txt        |    4 +-
 .../models/hrnet_pose_quantized/README.md     |    6 +-
 .../models/hrnet_pose_quantized/conftest.py   |   26 +
 .../models/hrnet_pose_quantized/demo.py       |    4 +-
 .../models/hrnet_pose_quantized/export.py     |   40 +-
 .../models/hrnet_pose_quantized/info.yaml     |    2 +
 .../models/hrnet_pose_quantized/perf.yaml     |   61 +-
 .../hrnet_pose_quantized/requirements.txt     |    4 +-
 .../huggingface_wavlm_base_plus/README.md     |    6 +-
 .../huggingface_wavlm_base_plus/conftest.py   |   26 +
 .../huggingface_wavlm_base_plus/export.py     |   39 +-
 .../huggingface_wavlm_base_plus/info.yaml     |    6 +-
 .../huggingface_wavlm_base_plus/model.py      |   20 +-
 .../huggingface_wavlm_base_plus/perf.yaml     |   71 +-
 .../requirements.txt                          |    8 +-
 .../huggingface_wavlm_base_plus/test.py       |    2 +
 qai_hub_models/models/inception_v3/README.md  |    6 +-
 .../models/inception_v3/conftest.py           |   24 +
 qai_hub_models/models/inception_v3/demo.py    |    4 +-
 qai_hub_models/models/inception_v3/export.py  |   42 +-
 qai_hub_models/models/inception_v3/info.yaml  |    2 +
 qai_hub_models/models/inception_v3/model.py   |    6 +-
 qai_hub_models/models/inception_v3/perf.yaml  |   77 +-
 qai_hub_models/models/inception_v3/test.py    |    3 +
 .../models/inception_v3_quantized/README.md   |   12 +-
 .../models/inception_v3_quantized/conftest.py |   24 +
 .../models/inception_v3_quantized/demo.py     |    7 +-
 .../models/inception_v3_quantized/export.py   |   42 +-
 .../models/inception_v3_quantized/info.yaml   |    8 +-
 .../models/inception_v3_quantized/model.py    |  145 ++-
 .../models/inception_v3_quantized/perf.yaml   |   67 +-
 .../models/inception_v3_quantized/test.py     |   11 -
 qai_hub_models/models/lama_dilated/README.md  |    6 +-
 .../models/lama_dilated/conftest.py           |   26 +
 qai_hub_models/models/lama_dilated/demo.py    |    2 +-
 qai_hub_models/models/lama_dilated/export.py  |   42 +-
 qai_hub_models/models/lama_dilated/info.yaml  |    2 +
 qai_hub_models/models/lama_dilated/model.py   |   16 +-
 qai_hub_models/models/lama_dilated/perf.yaml  |   69 +-
 .../models/lama_dilated/requirements.txt      |    9 +-
 qai_hub_models/models/lama_dilated/test.py    |    2 +
 qai_hub_models/models/litehrnet/README.md     |    6 +-
 qai_hub_models/models/litehrnet/conftest.py   |   24 +
 qai_hub_models/models/litehrnet/demo.py       |    4 +-
 qai_hub_models/models/litehrnet/export.py     |   42 +-
 qai_hub_models/models/litehrnet/info.yaml     |    2 +
 qai_hub_models/models/litehrnet/model.py      |    2 +-
 qai_hub_models/models/litehrnet/perf.yaml     |   59 +-
 .../models/litehrnet/requirements.txt         |    4 +-
 qai_hub_models/models/litehrnet/test.py       |    2 +
 .../llama_v2_7b_chat_quantized/README.md      |    4 +-
 .../llama_v2_7b_chat_quantized/info.yaml      |    2 +
 .../models/mediapipe_face/README.md           |   11 +-
 .../models/mediapipe_face/conftest.py         |   26 +
 .../models/mediapipe_face/export.py           |   69 +-
 .../models/mediapipe_face/info.yaml           |    2 +
 qai_hub_models/models/mediapipe_face/model.py |    6 +-
 .../models/mediapipe_face/perf.yaml           |  139 ++-
 .../models/mediapipe_face/requirements.txt    |    2 -
 .../models/mediapipe_hand/README.md           |   11 +-
 .../models/mediapipe_hand/conftest.py         |   26 +
 .../models/mediapipe_hand/export.py           |   69 +-
 .../models/mediapipe_hand/info.yaml           |    2 +
 qai_hub_models/models/mediapipe_hand/model.py |    6 +-
 .../models/mediapipe_hand/perf.yaml           |  141 ++-
 .../models/mediapipe_hand/requirements.txt    |    2 -
 .../models/mediapipe_pose/README.md           |   11 +-
 .../models/mediapipe_pose/conftest.py         |   26 +
 .../models/mediapipe_pose/export.py           |   69 +-
 .../models/mediapipe_pose/info.yaml           |    2 +
 qai_hub_models/models/mediapipe_pose/model.py |    6 +-
 .../models/mediapipe_pose/perf.yaml           |  143 ++-
 .../models/mediapipe_pose/requirements.txt    |    2 -
 .../models/mediapipe_selfie/README.md         |    6 +-
 .../models/mediapipe_selfie/conftest.py       |   24 +
 .../models/mediapipe_selfie/demo.py           |   43 +-
 .../models/mediapipe_selfie/export.py         |   43 +-
 .../models/mediapipe_selfie/info.yaml         |    2 +
 .../models/mediapipe_selfie/model.py          |    5 +-
 .../models/mediapipe_selfie/perf.yaml         |   67 +-
 qai_hub_models/models/mnasnet05/README.md     |    6 +-
 qai_hub_models/models/mnasnet05/conftest.py   |   24 +
 qai_hub_models/models/mnasnet05/demo.py       |    4 +-
 qai_hub_models/models/mnasnet05/export.py     |   42 +-
 qai_hub_models/models/mnasnet05/info.yaml     |    2 +
 qai_hub_models/models/mnasnet05/model.py      |    2 +-
 qai_hub_models/models/mnasnet05/perf.yaml     |   69 +-
 qai_hub_models/models/mnasnet05/test.py       |    3 +
 qai_hub_models/models/mobilenet_v2/README.md  |    6 +-
 .../models/mobilenet_v2/conftest.py           |   26 +
 qai_hub_models/models/mobilenet_v2/demo.py    |    4 +-
 qai_hub_models/models/mobilenet_v2/export.py  |   42 +-
 qai_hub_models/models/mobilenet_v2/info.yaml  |    2 +
 qai_hub_models/models/mobilenet_v2/model.py   |    9 +-
 qai_hub_models/models/mobilenet_v2/perf.yaml  |   69 +-
 qai_hub_models/models/mobilenet_v2/test.py    |    3 +
 .../models/mobilenet_v2_quantized/README.md   |    6 +-
 .../models/mobilenet_v2_quantized/conftest.py |   26 +
 .../models/mobilenet_v2_quantized/demo.py     |    7 +-
 .../models/mobilenet_v2_quantized/export.py   |   40 +-
 .../models/mobilenet_v2_quantized/info.yaml   |    2 +
 .../models/mobilenet_v2_quantized/model.py    |   35 +-
 .../models/mobilenet_v2_quantized/perf.yaml   |   79 +-
 .../models/mobilenet_v2_quantized/test.py     |   10 -
 .../models/mobilenet_v3_large/README.md       |    8 +-
 .../models/mobilenet_v3_large/conftest.py     |   24 +
 .../models/mobilenet_v3_large/demo.py         |    4 +-
 .../models/mobilenet_v3_large/export.py       |   42 +-
 .../models/mobilenet_v3_large/info.yaml       |    4 +-
 .../models/mobilenet_v3_large/model.py        |    2 +-
 .../models/mobilenet_v3_large/perf.yaml       |   59 +-
 .../models/mobilenet_v3_large/test.py         |    3 +
 .../mobilenet_v3_large_quantized/README.md    |   54 +
 .../mobilenet_v3_large_quantized/__init__.py  |   13 +
 .../mobilenet_v3_large_quantized/conftest.py  |   24 +
 .../mobilenet_v3_large_quantized/demo.py      |   23 +
 .../mobilenet_v3_large_quantized/export.py    |  202 ++++
 .../mobilenet_v3_large_quantized/info.yaml    |   44 +
 .../mobilenet_v3_large_quantized/model.py     |   85 ++
 .../mobilenet_v3_large_quantized/perf.yaml    |  108 ++
 .../mobilenet_v3_large_quantized/test.py      |   29 +
 .../models/mobilenet_v3_small/README.md       |    6 +-
 .../models/mobilenet_v3_small/conftest.py     |   24 +
 .../models/mobilenet_v3_small/demo.py         |    4 +-
 .../models/mobilenet_v3_small/export.py       |   42 +-
 .../models/mobilenet_v3_small/info.yaml       |    2 +
 .../models/mobilenet_v3_small/model.py        |    2 +-
 .../models/mobilenet_v3_small/perf.yaml       |   59 +-
 .../models/mobilenet_v3_small/test.py         |    3 +
 qai_hub_models/models/openai_clip/README.md   |    6 +-
 qai_hub_models/models/openai_clip/app.py      |    2 +-
 qai_hub_models/models/openai_clip/conftest.py |   26 +
 qai_hub_models/models/openai_clip/export.py   |   69 +-
 qai_hub_models/models/openai_clip/info.yaml   |    2 +
 qai_hub_models/models/openai_clip/model.py    |    4 +-
 qai_hub_models/models/openai_clip/perf.yaml   |  145 ++-
 .../models/openai_clip/requirements.txt       |    1 -
 qai_hub_models/models/openpose/README.md      |    6 +-
 qai_hub_models/models/openpose/conftest.py    |   26 +
 qai_hub_models/models/openpose/demo.py        |   30 +-
 qai_hub_models/models/openpose/export.py      |   43 +-
 qai_hub_models/models/openpose/info.yaml      |    2 +
 qai_hub_models/models/openpose/model.py       |    2 +-
 qai_hub_models/models/openpose/perf.yaml      |   67 +-
 .../models/openpose/requirements.txt          |    4 +-
 qai_hub_models/models/protocols.py            |  194 ++++
 .../models/quicksrnetlarge/README.md          |    6 +-
 .../models/quicksrnetlarge/conftest.py        |   26 +
 qai_hub_models/models/quicksrnetlarge/demo.py |    1 +
 .../models/quicksrnetlarge/export.py          |   43 +-
 .../models/quicksrnetlarge/info.yaml          |    4 +-
 .../models/quicksrnetlarge/model.py           |    2 -
 .../models/quicksrnetlarge/perf.yaml          |   67 +-
 qai_hub_models/models/quicksrnetlarge/test.py |    1 +
 .../quicksrnetlarge_quantized/README.md       |   54 +
 .../quicksrnetlarge_quantized/__init__.py     |   10 +
 .../quicksrnetlarge_quantized/conftest.py     |   26 +
 .../models/quicksrnetlarge_quantized/demo.py  |   28 +
 .../quicksrnetlarge_quantized/export.py       |  215 ++++
 .../quicksrnetlarge_quantized/info.yaml       |   35 +
 .../models/quicksrnetlarge_quantized/model.py |   99 ++
 .../quicksrnetlarge_quantized/perf.yaml       |  108 ++
 .../models/quicksrnetlarge_quantized/test.py  |   89 ++
 .../models/quicksrnetmedium/README.md         |    6 +-
 .../models/quicksrnetmedium/conftest.py       |   26 +
 .../models/quicksrnetmedium/demo.py           |    1 +
 .../models/quicksrnetmedium/export.py         |   43 +-
 .../models/quicksrnetmedium/info.yaml         |    4 +-
 .../models/quicksrnetmedium/model.py          |    2 -
 .../models/quicksrnetmedium/perf.yaml         |  163 ++-
 .../models/quicksrnetmedium/test.py           |    1 +
 .../quicksrnetmedium_quantized/README.md      |   54 +
 .../quicksrnetmedium_quantized/__init__.py    |   10 +
 .../quicksrnetmedium_quantized/conftest.py    |   26 +
 .../models/quicksrnetmedium_quantized/demo.py |   28 +
 .../quicksrnetmedium_quantized/export.py      |  215 ++++
 .../quicksrnetmedium_quantized/info.yaml      |   35 +
 .../quicksrnetmedium_quantized/model.py       |   98 ++
 .../quicksrnetmedium_quantized/perf.yaml      |  108 ++
 .../models/quicksrnetmedium_quantized/test.py |   91 ++
 .../models/quicksrnetsmall/README.md          |    6 +-
 .../models/quicksrnetsmall/conftest.py        |   26 +
 qai_hub_models/models/quicksrnetsmall/demo.py |    1 +
 .../models/quicksrnetsmall/export.py          |   42 +-
 .../models/quicksrnetsmall/info.yaml          |    4 +-
 .../models/quicksrnetsmall/model.py           |    2 -
 .../models/quicksrnetsmall/perf.yaml          |   67 +-
 qai_hub_models/models/quicksrnetsmall/test.py |    1 +
 .../quicksrnetsmall_quantized/README.md       |   54 +
 .../quicksrnetsmall_quantized/__init__.py     |   10 +
 .../quicksrnetsmall_quantized/conftest.py     |   26 +
 .../models/quicksrnetsmall_quantized/demo.py  |   28 +
 .../quicksrnetsmall_quantized/export.py       |  215 ++++
 .../quicksrnetsmall_quantized/info.yaml       |   35 +
 .../models/quicksrnetsmall_quantized/model.py |   97 ++
 .../quicksrnetsmall_quantized/perf.yaml       |  108 ++
 .../models/quicksrnetsmall_quantized/test.py  |   87 ++
 .../models/real_esrgan_general_x4v3/README.md |    6 +-
 .../real_esrgan_general_x4v3/conftest.py      |   26 +
 .../models/real_esrgan_general_x4v3/demo.py   |    1 +
 .../models/real_esrgan_general_x4v3/export.py |   43 +-
 .../models/real_esrgan_general_x4v3/info.yaml |    2 +
 .../models/real_esrgan_general_x4v3/perf.yaml |   69 +-
 .../real_esrgan_general_x4v3/requirements.txt |    9 +-
 .../models/real_esrgan_x4plus/README.md       |    6 +-
 .../models/real_esrgan_x4plus/conftest.py     |   26 +
 .../models/real_esrgan_x4plus/demo.py         |    1 +
 .../models/real_esrgan_x4plus/export.py       |   43 +-
 .../models/real_esrgan_x4plus/info.yaml       |    2 +
 .../models/real_esrgan_x4plus/perf.yaml       |   79 +-
 .../real_esrgan_x4plus/requirements.txt       |    7 +-
 .../models/real_esrgan_x4plus/test.py         |    1 +
 qai_hub_models/models/regnet/README.md        |    6 +-
 qai_hub_models/models/regnet/conftest.py      |   24 +
 qai_hub_models/models/regnet/demo.py          |    4 +-
 qai_hub_models/models/regnet/export.py        |   42 +-
 qai_hub_models/models/regnet/info.yaml        |    2 +
 qai_hub_models/models/regnet/perf.yaml        |   69 +-
 qai_hub_models/models/regnet/test.py          |    3 +
 qai_hub_models/models/resnet101/README.md     |    6 +-
 qai_hub_models/models/resnet101/conftest.py   |   24 +
 qai_hub_models/models/resnet101/demo.py       |    4 +-
 qai_hub_models/models/resnet101/export.py     |   42 +-
 qai_hub_models/models/resnet101/info.yaml     |    2 +
 qai_hub_models/models/resnet101/perf.yaml     |   65 +-
 qai_hub_models/models/resnet101/test.py       |    3 +
 .../models/resnet101_quantized/README.md      |    6 +-
 .../models/resnet101_quantized/conftest.py    |   24 +
 .../models/resnet101_quantized/demo.py        |    7 +-
 .../models/resnet101_quantized/export.py      |   42 +-
 .../models/resnet101_quantized/info.yaml      |    2 +
 .../models/resnet101_quantized/model.py       |   33 +-
 .../models/resnet101_quantized/perf.yaml      |   85 +-
 .../models/resnet101_quantized/test.py        |   11 -
 qai_hub_models/models/resnet18/README.md      |    6 +-
 qai_hub_models/models/resnet18/conftest.py    |   24 +
 qai_hub_models/models/resnet18/demo.py        |    4 +-
 qai_hub_models/models/resnet18/export.py      |   42 +-
 qai_hub_models/models/resnet18/info.yaml      |    2 +
 qai_hub_models/models/resnet18/perf.yaml      |   69 +-
 qai_hub_models/models/resnet18/test.py        |    3 +
 .../models/resnet18_quantized/README.md       |    6 +-
 .../models/resnet18_quantized/conftest.py     |   24 +
 .../models/resnet18_quantized/demo.py         |    4 +-
 .../models/resnet18_quantized/export.py       |   40 +-
 .../models/resnet18_quantized/info.yaml       |    2 +
 .../models/resnet18_quantized/model.py        |   37 +-
 .../models/resnet18_quantized/perf.yaml       |   71 +-
 .../models/resnet18_quantized/test.py         |   11 -
 qai_hub_models/models/resnet50/README.md      |    6 +-
 qai_hub_models/models/resnet50/conftest.py    |   24 +
 qai_hub_models/models/resnet50/demo.py        |    4 +-
 qai_hub_models/models/resnet50/export.py      |   42 +-
 qai_hub_models/models/resnet50/info.yaml      |    2 +
 qai_hub_models/models/resnet50/perf.yaml      |   69 +-
 qai_hub_models/models/resnet50/test.py        |    3 +
 qai_hub_models/models/resnext101/README.md    |    6 +-
 qai_hub_models/models/resnext101/conftest.py  |   24 +
 qai_hub_models/models/resnext101/demo.py      |    4 +-
 qai_hub_models/models/resnext101/export.py    |   42 +-
 qai_hub_models/models/resnext101/info.yaml    |    2 +
 qai_hub_models/models/resnext101/perf.yaml    |   65 +-
 qai_hub_models/models/resnext101/test.py      |    3 +
 .../models/resnext101_quantized/README.md     |    6 +-
 .../models/resnext101_quantized/conftest.py   |   24 +
 .../models/resnext101_quantized/demo.py       |    7 +-
 .../models/resnext101_quantized/export.py     |   40 +-
 .../models/resnext101_quantized/info.yaml     |    2 +
 .../models/resnext101_quantized/model.py      |   34 +-
 .../models/resnext101_quantized/perf.yaml     |   65 +-
 .../models/resnext101_quantized/test.py       |   11 -
 qai_hub_models/models/resnext50/README.md     |    6 +-
 qai_hub_models/models/resnext50/conftest.py   |   24 +
 qai_hub_models/models/resnext50/demo.py       |    4 +-
 qai_hub_models/models/resnext50/export.py     |   42 +-
 qai_hub_models/models/resnext50/info.yaml     |    2 +
 qai_hub_models/models/resnext50/model.py      |    2 +-
 qai_hub_models/models/resnext50/perf.yaml     |   63 +-
 qai_hub_models/models/resnext50/test.py       |    3 +
 .../models/resnext50_quantized/README.md      |   54 +
 .../models/resnext50_quantized/__init__.py    |   10 +
 .../models/resnext50_quantized/conftest.py    |   24 +
 .../models/resnext50_quantized/demo.py        |   17 +
 .../models/resnext50_quantized/export.py      |  209 ++++
 .../models/resnext50_quantized/info.yaml      |   43 +
 .../models/resnext50_quantized/model.py       |   93 ++
 .../perf.yaml                                 |   75 +-
 .../models/resnext50_quantized/test.py        |   30 +
 qai_hub_models/models/sam/README.md           |    6 +-
 qai_hub_models/models/sam/conftest.py         |   28 +
 qai_hub_models/models/sam/export.py           |   79 +-
 qai_hub_models/models/sam/info.yaml           |    2 +
 qai_hub_models/models/sam/model.py            |   64 +-
 qai_hub_models/models/sam/perf.yaml           |   69 +-
 qai_hub_models/models/sam/requirements.txt    |    6 +-
 qai_hub_models/models/sam/test.py             |    4 +-
 qai_hub_models/models/sesr_m5/README.md       |    6 +-
 qai_hub_models/models/sesr_m5/conftest.py     |   26 +
 qai_hub_models/models/sesr_m5/demo.py         |    1 +
 qai_hub_models/models/sesr_m5/export.py       |   39 +-
 qai_hub_models/models/sesr_m5/info.yaml       |    2 +
 qai_hub_models/models/sesr_m5/model.py        |    2 -
 qai_hub_models/models/sesr_m5/perf.yaml       |   69 +-
 qai_hub_models/models/sesr_m5/test.py         |    1 +
 .../models/sesr_m5_quantized/README.md        |    6 +-
 .../models/sesr_m5_quantized/conftest.py      |   26 +
 .../models/sesr_m5_quantized/demo.py          |    1 +
 .../models/sesr_m5_quantized/export.py        |   37 +-
 .../models/sesr_m5_quantized/info.yaml        |    2 +
 .../models/sesr_m5_quantized/model.py         |    4 +-
 .../models/sesr_m5_quantized/perf.yaml        |   61 +-
 .../models/sesr_m5_quantized/test.py          |    4 +-
 qai_hub_models/models/shufflenet_v2/README.md |    6 +-
 .../models/shufflenet_v2/conftest.py          |   24 +
 qai_hub_models/models/shufflenet_v2/demo.py   |    4 +-
 qai_hub_models/models/shufflenet_v2/export.py |   42 +-
 qai_hub_models/models/shufflenet_v2/info.yaml |    2 +
 qai_hub_models/models/shufflenet_v2/model.py  |    2 +-
 qai_hub_models/models/shufflenet_v2/perf.yaml |   69 +-
 qai_hub_models/models/shufflenet_v2/test.py   |    3 +
 .../models/shufflenet_v2_quantized/README.md  |    6 +-
 .../shufflenet_v2_quantized/conftest.py       |   24 +
 .../models/shufflenet_v2_quantized/demo.py    |    7 +-
 .../models/shufflenet_v2_quantized/export.py  |   42 +-
 .../models/shufflenet_v2_quantized/info.yaml  |    2 +
 .../models/shufflenet_v2_quantized/model.py   |  104 +-
 .../models/shufflenet_v2_quantized/perf.yaml  |   81 +-
 .../models/shufflenet_v2_quantized/test.py    |   11 -
 qai_hub_models/models/sinet/README.md         |    6 +-
 qai_hub_models/models/sinet/conftest.py       |   26 +
 qai_hub_models/models/sinet/demo.py           |    4 +-
 qai_hub_models/models/sinet/export.py         |   42 +-
 qai_hub_models/models/sinet/info.yaml         |    2 +
 qai_hub_models/models/sinet/model.py          |   19 +-
 qai_hub_models/models/sinet/perf.yaml         |   69 +-
 qai_hub_models/models/sinet/test.py           |    1 +
 qai_hub_models/models/squeezenet1_1/README.md |    6 +-
 .../models/squeezenet1_1/conftest.py          |   24 +
 qai_hub_models/models/squeezenet1_1/demo.py   |    4 +-
 qai_hub_models/models/squeezenet1_1/export.py |   42 +-
 qai_hub_models/models/squeezenet1_1/info.yaml |    2 +
 qai_hub_models/models/squeezenet1_1/model.py  |    2 +-
 qai_hub_models/models/squeezenet1_1/perf.yaml |   67 +-
 qai_hub_models/models/squeezenet1_1/test.py   |    3 +
 .../models/squeezenet1_1_quantized/README.md  |    6 +-
 .../squeezenet1_1_quantized/conftest.py       |   24 +
 .../models/squeezenet1_1_quantized/demo.py    |    7 +-
 .../models/squeezenet1_1_quantized/export.py  |   40 +-
 .../models/squeezenet1_1_quantized/info.yaml  |    2 +
 .../models/squeezenet1_1_quantized/model.py   |   29 +-
 .../models/squeezenet1_1_quantized/perf.yaml  |   79 +-
 .../models/squeezenet1_1_quantized/test.py    |   11 -
 .../stable_diffusion_quantized/README.md      |    6 +-
 .../stable_diffusion_quantized/export.py      |   65 +-
 .../stable_diffusion_quantized/info.yaml      |    2 +
 .../stable_diffusion_quantized/model.py       |   32 +-
 .../requirements.txt                          |    2 +-
 .../models/stable_diffusion_quantized/test.py |    7 +
 qai_hub_models/models/stylegan2/README.md     |    6 +-
 qai_hub_models/models/stylegan2/conftest.py   |   26 +
 qai_hub_models/models/stylegan2/demo.py       |    4 +-
 qai_hub_models/models/stylegan2/export.py     |   39 +-
 qai_hub_models/models/stylegan2/info.yaml     |    2 +
 qai_hub_models/models/stylegan2/model.py      |   38 +-
 qai_hub_models/models/stylegan2/perf.yaml     |   63 +-
 .../models/stylegan2/requirements.txt         |    2 +-
 qai_hub_models/models/stylegan2/test.py       |    2 +
 qai_hub_models/models/swin_base/README.md     |    6 +-
 qai_hub_models/models/swin_base/conftest.py   |   24 +
 qai_hub_models/models/swin_base/demo.py       |    4 +-
 qai_hub_models/models/swin_base/export.py     |   42 +-
 qai_hub_models/models/swin_base/info.yaml     |    2 +
 qai_hub_models/models/swin_base/perf.yaml     |   67 +-
 qai_hub_models/models/swin_base/test.py       |    2 +-
 qai_hub_models/models/swin_small/README.md    |    6 +-
 qai_hub_models/models/swin_small/conftest.py  |   24 +
 qai_hub_models/models/swin_small/demo.py      |    4 +-
 qai_hub_models/models/swin_small/export.py    |   42 +-
 qai_hub_models/models/swin_small/info.yaml    |    2 +
 qai_hub_models/models/swin_small/perf.yaml    |   67 +-
 qai_hub_models/models/swin_small/test.py      |    2 +-
 qai_hub_models/models/swin_tiny/README.md     |    6 +-
 qai_hub_models/models/swin_tiny/conftest.py   |   24 +
 qai_hub_models/models/swin_tiny/demo.py       |    4 +-
 qai_hub_models/models/swin_tiny/export.py     |   42 +-
 qai_hub_models/models/swin_tiny/info.yaml     |    2 +
 qai_hub_models/models/swin_tiny/perf.yaml     |   67 +-
 qai_hub_models/models/swin_tiny/test.py       |    2 +-
 qai_hub_models/models/trocr/README.md         |    6 +-
 qai_hub_models/models/trocr/conftest.py       |   24 +
 qai_hub_models/models/trocr/export.py         |   69 +-
 qai_hub_models/models/trocr/info.yaml         |    2 +
 qai_hub_models/models/trocr/model.py          |   25 +-
 qai_hub_models/models/trocr/perf.yaml         |  115 +-
 qai_hub_models/models/trocr/requirements.txt  |    4 +-
 .../models/unet_segmentation/README.md        |    6 +-
 .../models/unet_segmentation/conftest.py      |   24 +
 .../models/unet_segmentation/demo.py          |    6 +-
 .../models/unet_segmentation/export.py        |   42 +-
 .../models/unet_segmentation/info.yaml        |    2 +
 .../models/unet_segmentation/perf.yaml        |   69 +-
 qai_hub_models/models/vit/README.md           |    6 +-
 qai_hub_models/models/vit/conftest.py         |   24 +
 qai_hub_models/models/vit/demo.py             |    4 +-
 qai_hub_models/models/vit/export.py           |   42 +-
 qai_hub_models/models/vit/info.yaml           |    2 +
 qai_hub_models/models/vit/model.py            |    2 +-
 qai_hub_models/models/vit/perf.yaml           |   59 +-
 qai_hub_models/models/whisper_asr/info.yaml   |   38 -
 .../README.md                                 |   22 +-
 .../__init__.py                               |    5 +-
 .../models/whisper_base_en/conftest.py        |   24 +
 qai_hub_models/models/whisper_base_en/demo.py |   14 +
 .../export.py                                 |   77 +-
 .../models/whisper_base_en/info.yaml          |   40 +
 .../models/whisper_base_en/model.py           |   16 +
 .../models/whisper_base_en/perf.yaml          |  186 +++
 .../requirements.txt                          |    0
 qai_hub_models/models/whisper_base_en/test.py |   22 +
 .../models/whisper_small_en/README.md         |   59 +
 .../models/whisper_small_en/__init__.py       |    8 +
 .../models/whisper_small_en/conftest.py       |   24 +
 .../models/whisper_small_en/demo.py           |   14 +
 .../models/whisper_small_en/export.py         |  229 ++++
 .../models/whisper_small_en/info.yaml         |   40 +
 .../models/whisper_small_en/model.py          |   16 +
 .../models/whisper_small_en/perf.yaml         |  186 +++
 .../models/whisper_small_en/requirements.txt  |    2 +
 .../models/whisper_small_en/test.py           |   22 +
 .../models/whisper_small_multi/code-gen.yaml  |    4 +
 .../models/whisper_small_multi/demo.py        |   14 +
 .../models/whisper_tiny_en/README.md          |   59 +
 .../models/whisper_tiny_en/__init__.py        |    8 +
 .../models/whisper_tiny_en/conftest.py        |   24 +
 qai_hub_models/models/whisper_tiny_en/demo.py |   14 +
 .../models/whisper_tiny_en/export.py          |  229 ++++
 .../models/whisper_tiny_en/info.yaml          |   40 +
 .../models/whisper_tiny_en/model.py           |   16 +
 .../models/whisper_tiny_en/perf.yaml          |  186 +++
 .../models/whisper_tiny_en/requirements.txt   |    2 +
 qai_hub_models/models/whisper_tiny_en/test.py |   22 +
 qai_hub_models/models/wideresnet50/README.md  |    6 +-
 .../models/wideresnet50/conftest.py           |   24 +
 qai_hub_models/models/wideresnet50/demo.py    |    4 +-
 qai_hub_models/models/wideresnet50/export.py  |   42 +-
 qai_hub_models/models/wideresnet50/info.yaml  |    2 +
 qai_hub_models/models/wideresnet50/model.py   |    2 +-
 qai_hub_models/models/wideresnet50/perf.yaml  |   69 +-
 qai_hub_models/models/wideresnet50/test.py    |    3 +
 .../models/wideresnet50_quantized/README.md   |    6 +-
 .../models/wideresnet50_quantized/conftest.py |   24 +
 .../models/wideresnet50_quantized/demo.py     |    7 +-
 .../models/wideresnet50_quantized/export.py   |   40 +-
 .../models/wideresnet50_quantized/info.yaml   |    2 +
 .../models/wideresnet50_quantized/model.py    |   40 +-
 .../models/wideresnet50_quantized/perf.yaml   |   77 +-
 .../models/wideresnet50_quantized/test.py     |   11 -
 qai_hub_models/models/xlsr/README.md          |    6 +-
 qai_hub_models/models/xlsr/conftest.py        |   26 +
 qai_hub_models/models/xlsr/demo.py            |    2 +-
 qai_hub_models/models/xlsr/export.py          |   39 +-
 qai_hub_models/models/xlsr/info.yaml          |    2 +
 qai_hub_models/models/xlsr/model.py           |   10 +-
 qai_hub_models/models/xlsr/perf.yaml          |   69 +-
 .../models/xlsr_quantized/README.md           |    6 +-
 .../models/xlsr_quantized/conftest.py         |   26 +
 qai_hub_models/models/xlsr_quantized/demo.py  |    1 +
 .../models/xlsr_quantized/export.py           |   37 +-
 .../models/xlsr_quantized/info.yaml           |    2 +
 .../models/xlsr_quantized/perf.yaml           |   61 +-
 qai_hub_models/models/yolov6/README.md        |    6 +-
 qai_hub_models/models/yolov6/conftest.py      |   26 +
 qai_hub_models/models/yolov6/demo.py          |    1 +
 qai_hub_models/models/yolov6/export.py        |   42 +-
 qai_hub_models/models/yolov6/info.yaml        |    3 +
 qai_hub_models/models/yolov6/perf.yaml        |   69 +-
 qai_hub_models/models/yolov6/test.py          |    1 +
 qai_hub_models/models/yolov7/README.md        |    6 +-
 qai_hub_models/models/yolov7/conftest.py      |   26 +
 qai_hub_models/models/yolov7/demo.py          |    1 +
 qai_hub_models/models/yolov7/export.py        |   42 +-
 qai_hub_models/models/yolov7/info.yaml        |    2 +
 qai_hub_models/models/yolov7/model.py         |    5 +-
 qai_hub_models/models/yolov7/perf.yaml        |   59 +-
 qai_hub_models/models/yolov7/requirements.txt |    9 +-
 qai_hub_models/models/yolov7/test.py          |    2 +
 qai_hub_models/models/yolov8_det/README.md    |   14 +-
 qai_hub_models/models/yolov8_det/conftest.py  |   24 +
 qai_hub_models/models/yolov8_det/demo.py      |    1 +
 qai_hub_models/models/yolov8_det/export.py    |   44 +-
 qai_hub_models/models/yolov8_det/info.yaml    |   10 +-
 qai_hub_models/models/yolov8_det/perf.yaml    |   71 +-
 .../models/yolov8_det/requirements.txt        |    2 +
 qai_hub_models/models/yolov8_det/test.py      |    2 -
 qai_hub_models/models/yolov8_seg/README.md    |   14 +-
 qai_hub_models/models/yolov8_seg/conftest.py  |   24 +
 qai_hub_models/models/yolov8_seg/demo.py      |    4 +-
 qai_hub_models/models/yolov8_seg/export.py    |   44 +-
 qai_hub_models/models/yolov8_seg/info.yaml    |   10 +-
 qai_hub_models/models/yolov8_seg/model.py     |    2 +-
 qai_hub_models/models/yolov8_seg/perf.yaml    |   59 +-
 .../models/yolov8_seg/requirements.txt        |    2 +
 qai_hub_models/models/yolov8_seg/test.py      |    2 +
 qai_hub_models/requirements-dev.txt           |   24 +-
 qai_hub_models/requirements.txt               |   32 +-
 qai_hub_models/test/e2e/test_aimet_compile.py |   11 +-
 qai_hub_models/utils/aimet/config_loader.py   |   14 +-
 .../utils/aimet/default_config.json           |   21 +-
 .../utils/aimet/default_config_legacy_v1.json |   71 ++
 ...nel.json => default_config_legacy_v2.json} |    0
 qai_hub_models/utils/aimet/repo.py            |   32 +
 qai_hub_models/utils/args.py                  |   36 +-
 qai_hub_models/utils/asset_loaders.py         |   49 +-
 qai_hub_models/utils/base_model.py            |  225 ++--
 qai_hub_models/utils/compare.py               |    4 +-
 qai_hub_models/utils/config_loaders.py        |   84 +-
 qai_hub_models/utils/inference.py             |   75 +-
 qai_hub_models/utils/input_spec.py            |    2 +-
 qai_hub_models/utils/measurement.py           |   22 +-
 qai_hub_models/utils/model_card.py            |  160 ++-
 qai_hub_models/utils/path_helpers.py          |    9 +-
 qai_hub_models/utils/perf_summary.py          |  155 +--
 qai_hub_models/utils/printing.py              |    4 +-
 qai_hub_models/utils/qai_hub_helpers.py       |   20 +-
 qai_hub_models/utils/qnn_helpers.py           |    7 +-
 qai_hub_models/utils/quantization_aimet.py    |  228 ++--
 scripts/build_and_test.py                     |   43 +-
 scripts/ci/git-credential-helper.sh           |    4 +
 scripts/{ => examples}/quantize_ffnet.py      |    0
 .../examples/quantize_imagenet_classifier.py  |   41 +-
 ..._numerics_imagenet_classifier_quantized.py |  306 +++++
 .../test_numerics_mobilenet_v2_quantized.py   |  177 ---
 scripts/github/create-aws-profile.sh          |   18 +
 scripts/tasks/changes.py                      |   39 +-
 scripts/tasks/release.py                      |    1 +
 scripts/tasks/test.py                         |   54 +-
 scripts/tasks/venv.py                         |   42 +-
 scripts/util/common.sh                        |    4 +
 scripts/util/env_create.sh                    |    4 +
 scripts/util/env_sync.sh                      |    4 +
 scripts/util/github.sh                        |    4 +
 scripts/util/pytest_with_coverage.sh          |    4 +
 scripts/util/run_mypy.sh                      |    6 +-
 setup.py                                      |    2 +-
 857 files changed, 21748 insertions(+), 4591 deletions(-)
 delete mode 100644 .gitignore
 create mode 100644 apps/android/ImageClassification/README.md
 create mode 100644 apps/android/ImageClassification/build.gradle
 create mode 100644 apps/android/ImageClassification/build_apk.py
 create mode 100644 apps/android/ImageClassification/classification/build.gradle
 create mode 100644 apps/android/ImageClassification/classification/proguard-rules.pro
 create mode 100644 apps/android/ImageClassification/classification/src/main/AndroidManifest.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/assets/Sample1.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/assets/Sample2.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/assets/Sample3.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/assets/Sample4.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/assets/Sample5.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/assets/labels.txt
 create mode 100644 apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassification.java
 create mode 100644 apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassificationResult.java
 create mode 100644 apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/QNNActivity.java
 create mode 100644 apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Result.java
 create mode 100644 apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Utils.java
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/drawable-v24/ic_launcher_foreground.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/drawable/ic_launcher_background.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/drawable/image_classification_icon.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/layout/activity_classification.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/values-night/themes.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/values/colors.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/values/strings.xml
 create mode 100644 apps/android/ImageClassification/classification/src/main/res/values/themes.xml
 create mode 100644 apps/android/ImageClassification/gradle.properties
 create mode 100644 apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.jar
 create mode 100644 apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.properties
 create mode 100644 apps/android/ImageClassification/gradlew
 create mode 100644 apps/android/ImageClassification/gradlew.bat
 create mode 100644 apps/android/ImageClassification/settings.gradle
 create mode 100644 apps/android/ImageSuperResolution/README.md
 create mode 100644 apps/android/ImageSuperResolution/build.gradle
 create mode 100644 apps/android/ImageSuperResolution/build.properties
 create mode 100644 apps/android/ImageSuperResolution/build_apk.py
 create mode 100644 apps/android/ImageSuperResolution/gradle.properties
 create mode 100644 apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.jar
 create mode 100644 apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.properties
 create mode 100644 apps/android/ImageSuperResolution/gradlew
 create mode 100644 apps/android/ImageSuperResolution/gradlew.bat
 create mode 100644 apps/android/ImageSuperResolution/settings.gradle
 create mode 100644 apps/android/ImageSuperResolution/superresolution/build.gradle
 create mode 100644 apps/android/ImageSuperResolution/superresolution/proguard-rules.pro
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/AndroidManifest.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample1.jpg
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample2.jpg
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/QNNActivity.java
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Result.java
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolution.java
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolutionResult.java
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Utils.java
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/UtilsESRGAN.java
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/drawable-v24/ic_launcher_foreground.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/drawable/ic_launcher_background.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/layout/activity_superres.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/values-night/themes.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/values/colors.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/values/strings.xml
 create mode 100644 apps/android/ImageSuperResolution/superresolution/src/main/res/values/themes.xml
 create mode 100644 qai_hub_models/global_requirements.txt
 create mode 100644 qai_hub_models/models/_shared/whisper/__init__.py
 rename qai_hub_models/models/{whisper_asr => _shared/whisper}/app.py (98%)
 rename qai_hub_models/models/{whisper_asr => _shared/whisper}/demo.py (82%)
 rename qai_hub_models/models/{whisper_asr => _shared/whisper}/model.py (88%)
 rename qai_hub_models/models/{whisper_asr/test.py => _shared/whisper/test_utils.py} (71%)
 create mode 100644 qai_hub_models/models/aotgan/README.md
 create mode 100644 qai_hub_models/models/aotgan/__init__.py
 create mode 100644 qai_hub_models/models/aotgan/conftest.py
 create mode 100644 qai_hub_models/models/aotgan/demo.py
 create mode 100644 qai_hub_models/models/aotgan/export.py
 create mode 100644 qai_hub_models/models/aotgan/info.yaml
 create mode 100644 qai_hub_models/models/aotgan/model.py
 create mode 100644 qai_hub_models/models/aotgan/patches/layer_norm.diff
 create mode 100644 qai_hub_models/models/aotgan/perf.yaml
 create mode 100644 qai_hub_models/models/aotgan/test.py
 create mode 100644 qai_hub_models/models/common.py
 create mode 100644 qai_hub_models/models/convnext_tiny/conftest.py
 create mode 100644 qai_hub_models/models/ddrnet23_slim/conftest.py
 create mode 100644 qai_hub_models/models/deeplabv3_resnet50/conftest.py
 create mode 100644 qai_hub_models/models/densenet121/conftest.py
 create mode 100644 qai_hub_models/models/detr_resnet101/conftest.py
 create mode 100644 qai_hub_models/models/detr_resnet101_dc5/conftest.py
 create mode 100644 qai_hub_models/models/detr_resnet50/conftest.py
 create mode 100644 qai_hub_models/models/detr_resnet50_dc5/conftest.py
 create mode 100644 qai_hub_models/models/efficientnet_b0/conftest.py
 create mode 100644 qai_hub_models/models/esrgan/conftest.py
 create mode 100644 qai_hub_models/models/facebook_denoiser/conftest.py
 create mode 100644 qai_hub_models/models/fastsam_s/conftest.py
 create mode 100644 qai_hub_models/models/fastsam_x/conftest.py
 create mode 100644 qai_hub_models/models/fcn_resnet50/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_122ns_lowres/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_40s/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_40s_quantized/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_54s/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_54s_quantized/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_78s/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_78s_lowres/conftest.py
 create mode 100644 qai_hub_models/models/ffnet_78s_quantized/conftest.py
 create mode 100644 qai_hub_models/models/googlenet/conftest.py
 create mode 100644 qai_hub_models/models/googlenet_quantized/conftest.py
 create mode 100644 qai_hub_models/models/hrnet_pose/conftest.py
 create mode 100644 qai_hub_models/models/hrnet_pose_quantized/conftest.py
 create mode 100644 qai_hub_models/models/huggingface_wavlm_base_plus/conftest.py
 create mode 100644 qai_hub_models/models/inception_v3/conftest.py
 create mode 100644 qai_hub_models/models/inception_v3_quantized/conftest.py
 create mode 100644 qai_hub_models/models/lama_dilated/conftest.py
 create mode 100644 qai_hub_models/models/litehrnet/conftest.py
 create mode 100644 qai_hub_models/models/mediapipe_face/conftest.py
 delete mode 100644 qai_hub_models/models/mediapipe_face/requirements.txt
 create mode 100644 qai_hub_models/models/mediapipe_hand/conftest.py
 delete mode 100644 qai_hub_models/models/mediapipe_hand/requirements.txt
 create mode 100644 qai_hub_models/models/mediapipe_pose/conftest.py
 delete mode 100644 qai_hub_models/models/mediapipe_pose/requirements.txt
 create mode 100644 qai_hub_models/models/mediapipe_selfie/conftest.py
 create mode 100644 qai_hub_models/models/mnasnet05/conftest.py
 create mode 100644 qai_hub_models/models/mobilenet_v2/conftest.py
 create mode 100644 qai_hub_models/models/mobilenet_v2_quantized/conftest.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large/conftest.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/README.md
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/__init__.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/conftest.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/demo.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/export.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/model.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
 create mode 100644 qai_hub_models/models/mobilenet_v3_large_quantized/test.py
 create mode 100644 qai_hub_models/models/mobilenet_v3_small/conftest.py
 create mode 100644 qai_hub_models/models/openai_clip/conftest.py
 create mode 100644 qai_hub_models/models/openpose/conftest.py
 create mode 100644 qai_hub_models/models/protocols.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge/conftest.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/README.md
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/__init__.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/conftest.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/demo.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/export.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/model.py
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
 create mode 100644 qai_hub_models/models/quicksrnetlarge_quantized/test.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium/conftest.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/README.md
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/__init__.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/conftest.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/demo.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/export.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/model.py
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
 create mode 100644 qai_hub_models/models/quicksrnetmedium_quantized/test.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall/conftest.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/README.md
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/__init__.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/conftest.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/demo.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/export.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/model.py
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
 create mode 100644 qai_hub_models/models/quicksrnetsmall_quantized/test.py
 create mode 100644 qai_hub_models/models/real_esrgan_general_x4v3/conftest.py
 create mode 100644 qai_hub_models/models/real_esrgan_x4plus/conftest.py
 create mode 100644 qai_hub_models/models/regnet/conftest.py
 create mode 100644 qai_hub_models/models/resnet101/conftest.py
 create mode 100644 qai_hub_models/models/resnet101_quantized/conftest.py
 create mode 100644 qai_hub_models/models/resnet18/conftest.py
 create mode 100644 qai_hub_models/models/resnet18_quantized/conftest.py
 create mode 100644 qai_hub_models/models/resnet50/conftest.py
 create mode 100644 qai_hub_models/models/resnext101/conftest.py
 create mode 100644 qai_hub_models/models/resnext101_quantized/conftest.py
 create mode 100644 qai_hub_models/models/resnext50/conftest.py
 create mode 100644 qai_hub_models/models/resnext50_quantized/README.md
 create mode 100644 qai_hub_models/models/resnext50_quantized/__init__.py
 create mode 100644 qai_hub_models/models/resnext50_quantized/conftest.py
 create mode 100644 qai_hub_models/models/resnext50_quantized/demo.py
 create mode 100644 qai_hub_models/models/resnext50_quantized/export.py
 create mode 100644 qai_hub_models/models/resnext50_quantized/info.yaml
 create mode 100644 qai_hub_models/models/resnext50_quantized/model.py
 rename qai_hub_models/models/{whisper_asr => resnext50_quantized}/perf.yaml (71%)
 create mode 100644 qai_hub_models/models/resnext50_quantized/test.py
 create mode 100644 qai_hub_models/models/sam/conftest.py
 create mode 100644 qai_hub_models/models/sesr_m5/conftest.py
 create mode 100644 qai_hub_models/models/sesr_m5_quantized/conftest.py
 create mode 100644 qai_hub_models/models/shufflenet_v2/conftest.py
 create mode 100644 qai_hub_models/models/shufflenet_v2_quantized/conftest.py
 create mode 100644 qai_hub_models/models/sinet/conftest.py
 create mode 100644 qai_hub_models/models/squeezenet1_1/conftest.py
 create mode 100644 qai_hub_models/models/squeezenet1_1_quantized/conftest.py
 create mode 100644 qai_hub_models/models/stylegan2/conftest.py
 create mode 100644 qai_hub_models/models/swin_base/conftest.py
 create mode 100644 qai_hub_models/models/swin_small/conftest.py
 create mode 100644 qai_hub_models/models/swin_tiny/conftest.py
 create mode 100644 qai_hub_models/models/trocr/conftest.py
 create mode 100644 qai_hub_models/models/unet_segmentation/conftest.py
 create mode 100644 qai_hub_models/models/vit/conftest.py
 delete mode 100644 qai_hub_models/models/whisper_asr/info.yaml
 rename qai_hub_models/models/{whisper_asr => whisper_base_en}/README.md (58%)
 rename qai_hub_models/models/{whisper_asr => whisper_base_en}/__init__.py (67%)
 create mode 100644 qai_hub_models/models/whisper_base_en/conftest.py
 create mode 100644 qai_hub_models/models/whisper_base_en/demo.py
 rename qai_hub_models/models/{whisper_asr => whisper_base_en}/export.py (74%)
 create mode 100644 qai_hub_models/models/whisper_base_en/info.yaml
 create mode 100644 qai_hub_models/models/whisper_base_en/model.py
 create mode 100644 qai_hub_models/models/whisper_base_en/perf.yaml
 rename qai_hub_models/models/{whisper_asr => whisper_base_en}/requirements.txt (100%)
 create mode 100644 qai_hub_models/models/whisper_base_en/test.py
 create mode 100644 qai_hub_models/models/whisper_small_en/README.md
 create mode 100644 qai_hub_models/models/whisper_small_en/__init__.py
 create mode 100644 qai_hub_models/models/whisper_small_en/conftest.py
 create mode 100644 qai_hub_models/models/whisper_small_en/demo.py
 create mode 100644 qai_hub_models/models/whisper_small_en/export.py
 create mode 100644 qai_hub_models/models/whisper_small_en/info.yaml
 create mode 100644 qai_hub_models/models/whisper_small_en/model.py
 create mode 100644 qai_hub_models/models/whisper_small_en/perf.yaml
 create mode 100644 qai_hub_models/models/whisper_small_en/requirements.txt
 create mode 100644 qai_hub_models/models/whisper_small_en/test.py
 create mode 100644 qai_hub_models/models/whisper_small_multi/code-gen.yaml
 create mode 100644 qai_hub_models/models/whisper_small_multi/demo.py
 create mode 100644 qai_hub_models/models/whisper_tiny_en/README.md
 create mode 100644 qai_hub_models/models/whisper_tiny_en/__init__.py
 create mode 100644 qai_hub_models/models/whisper_tiny_en/conftest.py
 create mode 100644 qai_hub_models/models/whisper_tiny_en/demo.py
 create mode 100644 qai_hub_models/models/whisper_tiny_en/export.py
 create mode 100644 qai_hub_models/models/whisper_tiny_en/info.yaml
 create mode 100644 qai_hub_models/models/whisper_tiny_en/model.py
 create mode 100644 qai_hub_models/models/whisper_tiny_en/perf.yaml
 create mode 100644 qai_hub_models/models/whisper_tiny_en/requirements.txt
 create mode 100644 qai_hub_models/models/whisper_tiny_en/test.py
 create mode 100644 qai_hub_models/models/wideresnet50/conftest.py
 create mode 100644 qai_hub_models/models/wideresnet50_quantized/conftest.py
 create mode 100644 qai_hub_models/models/xlsr/conftest.py
 create mode 100644 qai_hub_models/models/xlsr_quantized/conftest.py
 create mode 100644 qai_hub_models/models/yolov6/conftest.py
 create mode 100644 qai_hub_models/models/yolov7/conftest.py
 create mode 100644 qai_hub_models/models/yolov8_det/conftest.py
 create mode 100644 qai_hub_models/models/yolov8_seg/conftest.py
 create mode 100644 qai_hub_models/utils/aimet/default_config_legacy_v1.json
 rename qai_hub_models/utils/aimet/{default_config_per_channel.json => default_config_legacy_v2.json} (100%)
 create mode 100644 qai_hub_models/utils/aimet/repo.py
 rename scripts/{ => examples}/quantize_ffnet.py (100%)
 create mode 100644 scripts/examples/test_numerics_imagenet_classifier_quantized.py
 delete mode 100644 scripts/examples/test_numerics_mobilenet_v2_quantized.py
 create mode 100755 scripts/github/create-aws-profile.sh

diff --git a/.gitattributes b/.gitattributes
index 3a241177..308063f2 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,3 @@
 *.jpg filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 916d385a..00000000
--- a/.gitignore
+++ /dev/null
@@ -1,115 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# Distribution / packaging
-.Python
-/bench/
-build/
-dist/
-demo_artifacts/
-develop-eggs/
-downloads/
-eggs/
-.eggs/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-qaihm-dev
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Environments
-.venv
-env/
-envs/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-*build/*
-*.onnx
-*.mlmodelc*
-*.pt
-*.wav
-*.npy
-*.csv
-*.dylib
-*.fpie
-*bin/ADSP_Inference_Test
-*.DS_Store
-
-# Hub exports
-**/*.mlmodel
-**/*.tflite
-
-# Zoo Readmes (these are autogenerated)
-qai_hub_models/**/README.md
-# Hugging Face Model Cards (these are autogenerated)
-qai_hub_models/**/HF_MODEL_CARD.md
diff --git a/README.md b/README.md
index 4d69627d..a6efef3b 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ memory etc.) and ready to deploy on Qualcomm® devices.
 * View open-source recipes to quantize, optimize, and deploy these models on-device.
 * Browse through [performance metrics](https://aihub.qualcomm.com/models) captured for these models on several devices.
 * Access the models through [Hugging Face](https://huggingface.co/qualcomm).
-* [Sign up](https://aihub.qualcomm.com/) to run these models on hosted Qualcomm® devices.
+* [Sign up](https://myaccount.qualcomm.com/signup) to run these models on hosted Qualcomm® devices.
 
 Supported runtimes
 * [TensorFlow Lite](https://www.tensorflow.org/lite)
@@ -67,7 +67,7 @@ pip install "qai_hub_models[yolov7]"
 
 Each model comes with the following set of CLI demos:
 * Locally runnable PyTorch based CLI demo to validate the model off device.
-* On-device CLI demo that produces a model ready for on-device deployment and runs the model on a hosted Qualcomm® device (needs [sign up](https://aihub.qualcomm.com/)).
+* On-device CLI demo that produces a model ready for on-device deployment and runs the model on a hosted Qualcomm® device (needs [sign up](https://myaccount.qualcomm.com/signup)).
 
 All the models produced by these demos are freely available on [Hugging
 Face](https://huggingface.co/qualcomm) or through our
@@ -129,7 +129,7 @@ Image.fromarray(pred_image).show()
 Qualcomm® device using [Qualcomm® AI Hub](https://aihub.qualcomm.com).
 
 To run the model on a hosted device, [sign up for access to Qualcomm® AI
-Hub](https://aihub.qualcomm.com). Sign-in to Qualcomm® AI Hub with your
+Hub](https://myaccount.qualcomm.com/signup). Sign-in to Qualcomm® AI Hub with your
 Qualcomm® ID. Once signed in navigate to Account -> Settings -> API Token.
 
 With this API token, you can configure your client to run models on the cloud
@@ -242,6 +242,14 @@ python -m pytest --pyargs qai_hub_models.models.yolov7.test
 
 For any issues, please contact us at ai-hub-support@qti.qualcomm.com.
 
+
+---
+
+### LICENSE
+
+Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICENSE).
+
+
 ---
 
 ## Model Directory
@@ -252,93 +260,99 @@ For any issues, please contact us at ai-hub-support@qti.qualcomm.com.
 | -- | -- | -- | -- | --
 | | | | |
 | **Image Classification**
-| [MobileNet-v2-Quantized](https://aihub.qualcomm.com/models/mobilenet_v2_quantized) | [qai_hub_models.models.mobilenet_v2_quantized](qai_hub_models/models/mobilenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [VIT](https://aihub.qualcomm.com/models/vit) | [qai_hub_models.models.vit](qai_hub_models/models/vit/README.md) | ✔️ | ✔️ | ✔️
+| [Inception-v3-Quantized](https://aihub.qualcomm.com/models/inception_v3_quantized) | [qai_hub_models.models.inception_v3_quantized](qai_hub_models/models/inception_v3_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [Inception-v3](https://aihub.qualcomm.com/models/inception_v3) | [qai_hub_models.models.inception_v3](qai_hub_models/models/inception_v3/README.md) | ✔️ | ✔️ | ✔️
+| [MobileNet-v3-Large](https://aihub.qualcomm.com/models/mobilenet_v3_large) | [qai_hub_models.models.mobilenet_v3_large](qai_hub_models/models/mobilenet_v3_large/README.md) | ✔️ | ✔️ | ✔️
 | [GoogLeNet](https://aihub.qualcomm.com/models/googlenet) | [qai_hub_models.models.googlenet](qai_hub_models/models/googlenet/README.md) | ✔️ | ✔️ | ✔️
+| [ResNeXt101](https://aihub.qualcomm.com/models/resnext101) | [qai_hub_models.models.resnext101](qai_hub_models/models/resnext101/README.md) | ✔️ | ✔️ | ✔️
 | [ResNet50](https://aihub.qualcomm.com/models/resnet50) | [qai_hub_models.models.resnet50](qai_hub_models/models/resnet50/README.md) | ✔️ | ✔️ | ✔️
-| [Swin-Small](https://aihub.qualcomm.com/models/swin_small) | [qai_hub_models.models.swin_small](qai_hub_models/models/swin_small/README.md) | ✔️ | ✔️ | ✔️
-| [Inception-v3Quantized](https://aihub.qualcomm.com/models/inception_v3_quantized) | [qai_hub_models.models.inception_v3_quantized](qai_hub_models/models/inception_v3_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [MobileNet-v3-Small](https://aihub.qualcomm.com/models/mobilenet_v3_small) | [qai_hub_models.models.mobilenet_v3_small](qai_hub_models/models/mobilenet_v3_small/README.md) | ✔️ | ✔️ | ✔️
-| [GoogLeNetQuantized](https://aihub.qualcomm.com/models/googlenet_quantized) | [qai_hub_models.models.googlenet_quantized](qai_hub_models/models/googlenet_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [RegNet](https://aihub.qualcomm.com/models/regnet) | [qai_hub_models.models.regnet](qai_hub_models/models/regnet/README.md) | ✔️ | ✔️ | ✔️
 | [ResNeXt50](https://aihub.qualcomm.com/models/resnext50) | [qai_hub_models.models.resnext50](qai_hub_models/models/resnext50/README.md) | ✔️ | ✔️ | ✔️
-| [VIT](https://aihub.qualcomm.com/models/vit) | [qai_hub_models.models.vit](qai_hub_models/models/vit/README.md) | ✔️ | ✔️ | ✔️
-| [ResNet18Quantized](https://aihub.qualcomm.com/models/resnet18_quantized) | [qai_hub_models.models.resnet18_quantized](qai_hub_models/models/resnet18_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [ResNet101](https://aihub.qualcomm.com/models/resnet101) | [qai_hub_models.models.resnet101](qai_hub_models/models/resnet101/README.md) | ✔️ | ✔️ | ✔️
-| [ResNeXt101](https://aihub.qualcomm.com/models/resnext101) | [qai_hub_models.models.resnext101](qai_hub_models/models/resnext101/README.md) | ✔️ | ✔️ | ✔️
-| [MobileNet-v2](https://aihub.qualcomm.com/models/mobilenet_v2) | [qai_hub_models.models.mobilenet_v2](qai_hub_models/models/mobilenet_v2/README.md) | ✔️ | ✔️ | ✔️
 | [SqueezeNet-1_1](https://aihub.qualcomm.com/models/squeezenet1_1) | [qai_hub_models.models.squeezenet1_1](qai_hub_models/models/squeezenet1_1/README.md) | ✔️ | ✔️ | ✔️
-| [SqueezeNet-1_1Quantized](https://aihub.qualcomm.com/models/squeezenet1_1_quantized) | [qai_hub_models.models.squeezenet1_1_quantized](qai_hub_models/models/squeezenet1_1_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [Inception-v3](https://aihub.qualcomm.com/models/inception_v3) | [qai_hub_models.models.inception_v3](qai_hub_models/models/inception_v3/README.md) | ✔️ | ✔️ | ✔️
-| [WideResNet50](https://aihub.qualcomm.com/models/wideresnet50) | [qai_hub_models.models.wideresnet50](qai_hub_models/models/wideresnet50/README.md) | ✔️ | ✔️ | ✔️
-| [ResNet101Quantized](https://aihub.qualcomm.com/models/resnet101_quantized) | [qai_hub_models.models.resnet101_quantized](qai_hub_models/models/resnet101_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [MNASNet05](https://aihub.qualcomm.com/models/mnasnet05) | [qai_hub_models.models.mnasnet05](qai_hub_models/models/mnasnet05/README.md) | ✔️ | ✔️ | ✔️
-| [Swin-Base](https://aihub.qualcomm.com/models/swin_base) | [qai_hub_models.models.swin_base](qai_hub_models/models/swin_base/README.md) | ✔️ | ✔️ | ✔️
-| [DenseNet-121](https://aihub.qualcomm.com/models/densenet121) | [qai_hub_models.models.densenet121](qai_hub_models/models/densenet121/README.md) | ✔️ | ✔️ | ✔️
+| [ResNeXt101Quantized](https://aihub.qualcomm.com/models/resnext101_quantized) | [qai_hub_models.models.resnext101_quantized](qai_hub_models/models/resnext101_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [Shufflenet-v2Quantized](https://aihub.qualcomm.com/models/shufflenet_v2_quantized) | [qai_hub_models.models.shufflenet_v2_quantized](qai_hub_models/models/shufflenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [Shufflenet-v2](https://aihub.qualcomm.com/models/shufflenet_v2) | [qai_hub_models.models.shufflenet_v2](qai_hub_models/models/shufflenet_v2/README.md) | ✔️ | ✔️ | ✔️
-| [ResNeXt101Quantized](https://aihub.qualcomm.com/models/resnext101_quantized) | [qai_hub_models.models.resnext101_quantized](qai_hub_models/models/resnext101_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [ResNet18](https://aihub.qualcomm.com/models/resnet18) | [qai_hub_models.models.resnet18](qai_hub_models/models/resnet18/README.md) | ✔️ | ✔️ | ✔️
+| [ResNeXt50Quantized](https://aihub.qualcomm.com/models/resnext50_quantized) | [qai_hub_models.models.resnext50_quantized](qai_hub_models/models/resnext50_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [DenseNet-121](https://aihub.qualcomm.com/models/densenet121) | [qai_hub_models.models.densenet121](qai_hub_models/models/densenet121/README.md) | ✔️ | ✔️ | ✔️
+| [Swin-Base](https://aihub.qualcomm.com/models/swin_base) | [qai_hub_models.models.swin_base](qai_hub_models/models/swin_base/README.md) | ✔️ | ✔️ | ✔️
+| [ResNet101](https://aihub.qualcomm.com/models/resnet101) | [qai_hub_models.models.resnet101](qai_hub_models/models/resnet101/README.md) | ✔️ | ✔️ | ✔️
 | [EfficientNet-B0](https://aihub.qualcomm.com/models/efficientnet_b0) | [qai_hub_models.models.efficientnet_b0](qai_hub_models/models/efficientnet_b0/README.md) | ✔️ | ✔️ | ✔️
-| [MobileNet-v3-Large](https://aihub.qualcomm.com/models/mobilenet_v3_large) | [qai_hub_models.models.mobilenet_v3_large](qai_hub_models/models/mobilenet_v3_large/README.md) | ✔️ | ✔️ | ✔️
+| [ResNet101Quantized](https://aihub.qualcomm.com/models/resnet101_quantized) | [qai_hub_models.models.resnet101_quantized](qai_hub_models/models/resnet101_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [WideResNet50-Quantized](https://aihub.qualcomm.com/models/wideresnet50_quantized) | [qai_hub_models.models.wideresnet50_quantized](qai_hub_models/models/wideresnet50_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [ConvNext-Tiny](https://aihub.qualcomm.com/models/convnext_tiny) | [qai_hub_models.models.convnext_tiny](qai_hub_models/models/convnext_tiny/README.md) | ✔️ | ✔️ | ✔️
+| [SqueezeNet-1_1Quantized](https://aihub.qualcomm.com/models/squeezenet1_1_quantized) | [qai_hub_models.models.squeezenet1_1_quantized](qai_hub_models/models/squeezenet1_1_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [MNASNet05](https://aihub.qualcomm.com/models/mnasnet05) | [qai_hub_models.models.mnasnet05](qai_hub_models/models/mnasnet05/README.md) | ✔️ | ✔️ | ✔️
+| [MobileNet-v3-Small](https://aihub.qualcomm.com/models/mobilenet_v3_small) | [qai_hub_models.models.mobilenet_v3_small](qai_hub_models/models/mobilenet_v3_small/README.md) | ✔️ | ✔️ | ✔️
+| [ResNet18Quantized](https://aihub.qualcomm.com/models/resnet18_quantized) | [qai_hub_models.models.resnet18_quantized](qai_hub_models/models/resnet18_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [Swin-Small](https://aihub.qualcomm.com/models/swin_small) | [qai_hub_models.models.swin_small](qai_hub_models/models/swin_small/README.md) | ✔️ | ✔️ | ✔️
+| [GoogLeNetQuantized](https://aihub.qualcomm.com/models/googlenet_quantized) | [qai_hub_models.models.googlenet_quantized](qai_hub_models/models/googlenet_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [MobileNet-v3-Large-Quantized](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized) | [qai_hub_models.models.mobilenet_v3_large_quantized](qai_hub_models/models/mobilenet_v3_large_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [MobileNet-v2](https://aihub.qualcomm.com/models/mobilenet_v2) | [qai_hub_models.models.mobilenet_v2](qai_hub_models/models/mobilenet_v2/README.md) | ✔️ | ✔️ | ✔️
+| [WideResNet50](https://aihub.qualcomm.com/models/wideresnet50) | [qai_hub_models.models.wideresnet50](qai_hub_models/models/wideresnet50/README.md) | ✔️ | ✔️ | ✔️
+| [RegNet](https://aihub.qualcomm.com/models/regnet) | [qai_hub_models.models.regnet](qai_hub_models/models/regnet/README.md) | ✔️ | ✔️ | ✔️
 | [Swin-Tiny](https://aihub.qualcomm.com/models/swin_tiny) | [qai_hub_models.models.swin_tiny](qai_hub_models/models/swin_tiny/README.md) | ✔️ | ✔️ | ✔️
+| [ConvNext-Tiny](https://aihub.qualcomm.com/models/convnext_tiny) | [qai_hub_models.models.convnext_tiny](qai_hub_models/models/convnext_tiny/README.md) | ✔️ | ✔️ | ✔️
+| [MobileNet-v2-Quantized](https://aihub.qualcomm.com/models/mobilenet_v2_quantized) | [qai_hub_models.models.mobilenet_v2_quantized](qai_hub_models/models/mobilenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Image Editing**
 | [LaMa-Dilated](https://aihub.qualcomm.com/models/lama_dilated) | [qai_hub_models.models.lama_dilated](qai_hub_models/models/lama_dilated/README.md) | ✔️ | ✔️ | ✔️
+| [AOT-GAN](https://aihub.qualcomm.com/models/aotgan) | [qai_hub_models.models.aotgan](qai_hub_models/models/aotgan/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Image Generation**
 | [StyleGAN2](https://aihub.qualcomm.com/models/stylegan2) | [qai_hub_models.models.stylegan2](qai_hub_models/models/stylegan2/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Super Resolution**
-| [QuickSRNetLarge](https://aihub.qualcomm.com/models/quicksrnetlarge) | [qai_hub_models.models.quicksrnetlarge](qai_hub_models/models/quicksrnetlarge/README.md) | ✔️ | ✔️ | ✔️
-| [ESRGAN](https://aihub.qualcomm.com/models/esrgan) | [qai_hub_models.models.esrgan](qai_hub_models/models/esrgan/README.md) | ✔️ | ✔️ | ✔️
-| [Real-ESRGAN-x4plus](https://aihub.qualcomm.com/models/real_esrgan_x4plus) | [qai_hub_models.models.real_esrgan_x4plus](qai_hub_models/models/real_esrgan_x4plus/README.md) | ✔️ | ✔️ | ✔️
 | [XLSR-Quantized](https://aihub.qualcomm.com/models/xlsr_quantized) | [qai_hub_models.models.xlsr_quantized](qai_hub_models/models/xlsr_quantized/README.md) | ✔️ | ✔️ | ✔️
-| [QuickSRNetMedium](https://aihub.qualcomm.com/models/quicksrnetmedium) | [qai_hub_models.models.quicksrnetmedium](qai_hub_models/models/quicksrnetmedium/README.md) | ✔️ | ✔️ | ✔️
+| [QuickSRNetLarge-Quantized](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized) | [qai_hub_models.models.quicksrnetlarge_quantized](qai_hub_models/models/quicksrnetlarge_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [QuickSRNetMedium-Quantized](https://aihub.qualcomm.com/models/quicksrnetmedium_quantized) | [qai_hub_models.models.quicksrnetmedium_quantized](qai_hub_models/models/quicksrnetmedium_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [Real-ESRGAN-x4plus](https://aihub.qualcomm.com/models/real_esrgan_x4plus) | [qai_hub_models.models.real_esrgan_x4plus](qai_hub_models/models/real_esrgan_x4plus/README.md) | ✔️ | ✔️ | ✔️
 | [Real-ESRGAN-General-x4v3](https://aihub.qualcomm.com/models/real_esrgan_general_x4v3) | [qai_hub_models.models.real_esrgan_general_x4v3](qai_hub_models/models/real_esrgan_general_x4v3/README.md) | ✔️ | ✔️ | ✔️
-| [SESR-M5-Quantized](https://aihub.qualcomm.com/models/sesr_m5_quantized) | [qai_hub_models.models.sesr_m5_quantized](qai_hub_models/models/sesr_m5_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [QuickSRNetMedium](https://aihub.qualcomm.com/models/quicksrnetmedium) | [qai_hub_models.models.quicksrnetmedium](qai_hub_models/models/quicksrnetmedium/README.md) | ✔️ | ✔️ | ✔️
+| [ESRGAN](https://aihub.qualcomm.com/models/esrgan) | [qai_hub_models.models.esrgan](qai_hub_models/models/esrgan/README.md) | ✔️ | ✔️ | ✔️
 | [QuickSRNetSmall](https://aihub.qualcomm.com/models/quicksrnetsmall) | [qai_hub_models.models.quicksrnetsmall](qai_hub_models/models/quicksrnetsmall/README.md) | ✔️ | ✔️ | ✔️
 | [SESR-M5](https://aihub.qualcomm.com/models/sesr_m5) | [qai_hub_models.models.sesr_m5](qai_hub_models/models/sesr_m5/README.md) | ✔️ | ✔️ | ✔️
+| [SESR-M5-Quantized](https://aihub.qualcomm.com/models/sesr_m5_quantized) | [qai_hub_models.models.sesr_m5_quantized](qai_hub_models/models/sesr_m5_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [QuickSRNetSmall-Quantized](https://aihub.qualcomm.com/models/quicksrnetsmall_quantized) | [qai_hub_models.models.quicksrnetsmall_quantized](qai_hub_models/models/quicksrnetsmall_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [XLSR](https://aihub.qualcomm.com/models/xlsr) | [qai_hub_models.models.xlsr](qai_hub_models/models/xlsr/README.md) | ✔️ | ✔️ | ✔️
+| [QuickSRNetLarge](https://aihub.qualcomm.com/models/quicksrnetlarge) | [qai_hub_models.models.quicksrnetlarge](qai_hub_models/models/quicksrnetlarge/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Semantic Segmentation**
-| [Yolo-v8-Segmentation](https://aihub.qualcomm.com/models/yolov8_seg) | [qai_hub_models.models.yolov8_seg](qai_hub_models/models/yolov8_seg/README.md) | ✔️ | ✔️ | ✔️
-| [SINet](https://aihub.qualcomm.com/models/sinet) | [qai_hub_models.models.sinet](qai_hub_models/models/sinet/README.md) | ✔️ | ✔️ | ✔️
-| [Unet-Segmentation](https://aihub.qualcomm.com/models/unet_segmentation) | [qai_hub_models.models.unet_segmentation](qai_hub_models/models/unet_segmentation/README.md) | ✔️ | ✔️ | ✔️
-| [FCN_ResNet50](https://aihub.qualcomm.com/models/fcn_resnet50) | [qai_hub_models.models.fcn_resnet50](qai_hub_models/models/fcn_resnet50/README.md) | ✔️ | ✔️ | ✔️
-| [DDRNet23-Slim](https://aihub.qualcomm.com/models/ddrnet23_slim) | [qai_hub_models.models.ddrnet23_slim](qai_hub_models/models/ddrnet23_slim/README.md) | ✔️ | ✔️ | ✔️
-| [FastSam-S](https://aihub.qualcomm.com/models/fastsam_s) | [qai_hub_models.models.fastsam_s](qai_hub_models/models/fastsam_s/README.md) | ✔️ | ✔️ | ✔️
-| [FFNet-122NS-LowRes](https://aihub.qualcomm.com/models/ffnet_122ns_lowres) | [qai_hub_models.models.ffnet_122ns_lowres](qai_hub_models/models/ffnet_122ns_lowres/README.md) | ✔️ | ✔️ | ✔️
-| [FFNet-78S-Quantized](https://aihub.qualcomm.com/models/ffnet_78s_quantized) | [qai_hub_models.models.ffnet_78s_quantized](qai_hub_models/models/ffnet_78s_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [FFNet-54S-Quantized](https://aihub.qualcomm.com/models/ffnet_54s_quantized) | [qai_hub_models.models.ffnet_54s_quantized](qai_hub_models/models/ffnet_54s_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [FFNet-40S-Quantized](https://aihub.qualcomm.com/models/ffnet_40s_quantized) | [qai_hub_models.models.ffnet_40s_quantized](qai_hub_models/models/ffnet_40s_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [FCN_ResNet50](https://aihub.qualcomm.com/models/fcn_resnet50) | [qai_hub_models.models.fcn_resnet50](qai_hub_models/models/fcn_resnet50/README.md) | ✔️ | ✔️ | ✔️
+| [FastSam-X](https://aihub.qualcomm.com/models/fastsam_x) | [qai_hub_models.models.fastsam_x](qai_hub_models/models/fastsam_x/README.md) | ✔️ | ✔️ | ✔️
 | [MediaPipe-Selfie-Segmentation](https://aihub.qualcomm.com/models/mediapipe_selfie) | [qai_hub_models.models.mediapipe_selfie](qai_hub_models/models/mediapipe_selfie/README.md) | ✔️ | ✔️ | ✔️
+| [Segment-Anything-Model](https://aihub.qualcomm.com/models/sam) | [qai_hub_models.models.sam](qai_hub_models/models/sam/README.md) | ✔️ | ✔️ | ✔️
+| [Unet-Segmentation](https://aihub.qualcomm.com/models/unet_segmentation) | [qai_hub_models.models.unet_segmentation](qai_hub_models/models/unet_segmentation/README.md) | ✔️ | ✔️ | ✔️
+| [FFNet-40S](https://aihub.qualcomm.com/models/ffnet_40s) | [qai_hub_models.models.ffnet_40s](qai_hub_models/models/ffnet_40s/README.md) | ✔️ | ✔️ | ✔️
+| [DDRNet23-Slim](https://aihub.qualcomm.com/models/ddrnet23_slim) | [qai_hub_models.models.ddrnet23_slim](qai_hub_models/models/ddrnet23_slim/README.md) | ✔️ | ✔️ | ✔️
 | [DeepLabV3-ResNet50](https://aihub.qualcomm.com/models/deeplabv3_resnet50) | [qai_hub_models.models.deeplabv3_resnet50](qai_hub_models/models/deeplabv3_resnet50/README.md) | ✔️ | ✔️ | ✔️
-| [FastSam-X](https://aihub.qualcomm.com/models/fastsam_x) | [qai_hub_models.models.fastsam_x](qai_hub_models/models/fastsam_x/README.md) | ✔️ | ✔️ | ✔️
+| [SINet](https://aihub.qualcomm.com/models/sinet) | [qai_hub_models.models.sinet](qai_hub_models/models/sinet/README.md) | ✔️ | ✔️ | ✔️
+| [FFNet-78S-Quantized](https://aihub.qualcomm.com/models/ffnet_78s_quantized) | [qai_hub_models.models.ffnet_78s_quantized](qai_hub_models/models/ffnet_78s_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [FFNet-54S](https://aihub.qualcomm.com/models/ffnet_54s) | [qai_hub_models.models.ffnet_54s](qai_hub_models/models/ffnet_54s/README.md) | ✔️ | ✔️ | ✔️
+| [FFNet-122NS-LowRes](https://aihub.qualcomm.com/models/ffnet_122ns_lowres) | [qai_hub_models.models.ffnet_122ns_lowres](qai_hub_models/models/ffnet_122ns_lowres/README.md) | ✔️ | ✔️ | ✔️
 | [FFNet-78S-LowRes](https://aihub.qualcomm.com/models/ffnet_78s_lowres) | [qai_hub_models.models.ffnet_78s_lowres](qai_hub_models/models/ffnet_78s_lowres/README.md) | ✔️ | ✔️ | ✔️
-| [Segment-Anything-Model](https://aihub.qualcomm.com/models/sam) | [qai_hub_models.models.sam](qai_hub_models/models/sam/README.md) | ✔️ | ✔️ | ✔️
+| [YOLOv8-Segmentation](https://aihub.qualcomm.com/models/yolov8_seg) | [qai_hub_models.models.yolov8_seg](qai_hub_models/models/yolov8_seg/README.md) | ✔️ | ✔️ | ✔️
 | [FFNet-78S](https://aihub.qualcomm.com/models/ffnet_78s) | [qai_hub_models.models.ffnet_78s](qai_hub_models/models/ffnet_78s/README.md) | ✔️ | ✔️ | ✔️
-| [FFNet-40S](https://aihub.qualcomm.com/models/ffnet_40s) | [qai_hub_models.models.ffnet_40s](qai_hub_models/models/ffnet_40s/README.md) | ✔️ | ✔️ | ✔️
-| [FFNet-54S-Quantized](https://aihub.qualcomm.com/models/ffnet_54s_quantized) | [qai_hub_models.models.ffnet_54s_quantized](qai_hub_models/models/ffnet_54s_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [FastSam-S](https://aihub.qualcomm.com/models/fastsam_s) | [qai_hub_models.models.fastsam_s](qai_hub_models/models/fastsam_s/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Object Detection**
-| [MediaPipe-Hand-Detection](https://aihub.qualcomm.com/models/mediapipe_hand) | [qai_hub_models.models.mediapipe_hand](qai_hub_models/models/mediapipe_hand/README.md) | ✔️ | ✔️ | ✔️
-| [DETR-ResNet50-DC5](https://aihub.qualcomm.com/models/detr_resnet50_dc5) | [qai_hub_models.models.detr_resnet50_dc5](qai_hub_models/models/detr_resnet50_dc5/README.md) | ✔️ | ✔️ | ✔️
-| [DETR-ResNet101-DC5](https://aihub.qualcomm.com/models/detr_resnet101_dc5) | [qai_hub_models.models.detr_resnet101_dc5](qai_hub_models/models/detr_resnet101_dc5/README.md) | ✔️ | ✔️ | ✔️
-| [Yolo-v8-Detection](https://aihub.qualcomm.com/models/yolov8_det) | [qai_hub_models.models.yolov8_det](qai_hub_models/models/yolov8_det/README.md) | ✔️ | ✔️ | ✔️
 | [DETR-ResNet101](https://aihub.qualcomm.com/models/detr_resnet101) | [qai_hub_models.models.detr_resnet101](qai_hub_models/models/detr_resnet101/README.md) | ✔️ | ✔️ | ✔️
-| [DETR-ResNet50](https://aihub.qualcomm.com/models/detr_resnet50) | [qai_hub_models.models.detr_resnet50](qai_hub_models/models/detr_resnet50/README.md) | ✔️ | ✔️ | ✔️
+| [MediaPipe-Face-Detection](https://aihub.qualcomm.com/models/mediapipe_face) | [qai_hub_models.models.mediapipe_face](qai_hub_models/models/mediapipe_face/README.md) | ✔️ | ✔️ | ✔️
+| [DETR-ResNet50-DC5](https://aihub.qualcomm.com/models/detr_resnet50_dc5) | [qai_hub_models.models.detr_resnet50_dc5](qai_hub_models/models/detr_resnet50_dc5/README.md) | ✔️ | ✔️ | ✔️
 | [Yolo-v7](https://aihub.qualcomm.com/models/yolov7) | [qai_hub_models.models.yolov7](qai_hub_models/models/yolov7/README.md) | ✔️ | ✔️ | ✔️
+| [YOLOv8-Detection](https://aihub.qualcomm.com/models/yolov8_det) | [qai_hub_models.models.yolov8_det](qai_hub_models/models/yolov8_det/README.md) | ✔️ | ✔️ | ✔️
 | [Yolo-v6](https://aihub.qualcomm.com/models/yolov6) | [qai_hub_models.models.yolov6](qai_hub_models/models/yolov6/README.md) | ✔️ | ✔️ | ✔️
-| [MediaPipe-Face-Detection](https://aihub.qualcomm.com/models/mediapipe_face) | [qai_hub_models.models.mediapipe_face](qai_hub_models/models/mediapipe_face/README.md) | ✔️ | ✔️ | ✔️
+| [DETR-ResNet101-DC5](https://aihub.qualcomm.com/models/detr_resnet101_dc5) | [qai_hub_models.models.detr_resnet101_dc5](qai_hub_models/models/detr_resnet101_dc5/README.md) | ✔️ | ✔️ | ✔️
+| [DETR-ResNet50](https://aihub.qualcomm.com/models/detr_resnet50) | [qai_hub_models.models.detr_resnet50](qai_hub_models/models/detr_resnet50/README.md) | ✔️ | ✔️ | ✔️
+| [MediaPipe-Hand-Detection](https://aihub.qualcomm.com/models/mediapipe_hand) | [qai_hub_models.models.mediapipe_hand](qai_hub_models/models/mediapipe_hand/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Pose Estimation**
-| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | ✔️ | ✔️ | ✔️
+| [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️
 | [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | ✔️ | ✔️ | ✔️
 | [HRNetPose](https://aihub.qualcomm.com/models/hrnet_pose) | [qai_hub_models.models.hrnet_pose](qai_hub_models/models/hrnet_pose/README.md) | ✔️ | ✔️ | ✔️
-| [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️
+| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | ✔️ | ✔️ | ✔️
 
 ### Audio
 
@@ -346,7 +360,9 @@ For any issues, please contact us at ai-hub-support@qti.qualcomm.com.
 | -- | -- | -- | -- | --
 | | | | |
 | **Speech Recognition**
-| [Whisper-Base](https://aihub.qualcomm.com/models/whisper_asr) | [qai_hub_models.models.whisper_asr](qai_hub_models/models/whisper_asr/README.md) | ✔️ | ✔️ | ✔️
+| [Whisper-Small-En](https://aihub.qualcomm.com/models/whisper_small_en) | [qai_hub_models.models.whisper_small_en](qai_hub_models/models/whisper_small_en/README.md) | ✔️ | ✔️ | ✔️
+| [Whisper-Tiny-En](https://aihub.qualcomm.com/models/whisper_tiny_en) | [qai_hub_models.models.whisper_tiny_en](qai_hub_models/models/whisper_tiny_en/README.md) | ✔️ | ✔️ | ✔️
+| [Whisper-Base-En](https://aihub.qualcomm.com/models/whisper_base_en) | [qai_hub_models.models.whisper_base_en](qai_hub_models/models/whisper_base_en/README.md) | ✔️ | ✔️ | ✔️
 | [HuggingFace-WavLM-Base-Plus](https://aihub.qualcomm.com/models/huggingface_wavlm_base_plus) | [qai_hub_models.models.huggingface_wavlm_base_plus](qai_hub_models/models/huggingface_wavlm_base_plus/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Audio Enhancement**
@@ -366,8 +382,8 @@ For any issues, please contact us at ai-hub-support@qti.qualcomm.com.
 | -- | -- | -- | -- | --
 | | | | |
 | **Image Generation**
-| [Stable-Diffusion](https://aihub.qualcomm.com/models/stable_diffusion_quantized) | [qai_hub_models.models.stable_diffusion_quantized](qai_hub_models/models/stable_diffusion_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [ControlNet](https://aihub.qualcomm.com/models/controlnet_quantized) | [qai_hub_models.models.controlnet_quantized](qai_hub_models/models/controlnet_quantized/README.md) | ✔️ | ✔️ | ✔️
+| [Stable-Diffusion](https://aihub.qualcomm.com/models/stable_diffusion_quantized) | [qai_hub_models.models.stable_diffusion_quantized](qai_hub_models/models/stable_diffusion_quantized/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Text Generation**
 | [Llama-v2-7B-Chat](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized) | [qai_hub_models.models.llama_v2_7b_chat_quantized](qai_hub_models/models/llama_v2_7b_chat_quantized/README.md) | ✔️ | ✔️ | ✔️
diff --git a/apps/android/ImageClassification/README.md b/apps/android/ImageClassification/README.md
new file mode 100644
index 00000000..84a83be4
--- /dev/null
+++ b/apps/android/ImageClassification/README.md
@@ -0,0 +1,72 @@
+### Requirements
+
+1. Java, android-sdk and sdkmanager is already set at user's end
+2. User should have Linux QNN SDK in local machine.
+
+
+## Info
+Right now we use mobilenet_v3_small.tflite model which takes 224x224 as input and gives array of 1000 as output. You can replace it with any tflite classification model, but you have to change the pre-processing, post-processing and dimensions in the app code based on model parameters.
+
+
+## Preprocessing
+
+
+```
+    for (int x = 0; x < input_dims1; x++) {
+        for (int y = 0; y < input_dims2; y++) {
+            int pixel = inputBitmap.getPixel(x, y);
+            List<Float> rgb = Arrays.asList((float)Color.red(pixel), (float)Color.green(pixel), (float)Color.blue(pixel));
+            for(int z = 0;z<3; z++){
+                floatinputarray[0][z][x][y] = (float)((rgb.get(z))-ImageMean.get(z))/ImageStd.get(z);
+            }
+        }
+    }
+```
+
+
+## PostProcessing
+
+
+```
+    public static List<Integer> findTop3Indices(float[] arr) {
+    List<Integer> topIndices = new ArrayList<>();
+
+    for (int i = 0; i < 3; i++) {
+        int maxIndex = 0;
+        float maxValue = arr[0];
+
+        for (int j = 1; j < arr.length; j++) {
+            if (arr[j] > maxValue && !topIndices.contains(j)) {
+                maxValue = arr[j];
+                maxIndex = j;
+            }
+        }
+
+        topIndices.add(maxIndex);
+    }
+
+    return topIndices;
+    }
+```
+
+### Build App:
+
+You have to run build_apk.py for Image Classification. It will generate classification-debug.apk and install it in connected device.
+
+
+    build_apk.py [-h] -q QNNSDK (-m MODEL_PATH | -e MODEL_NAME)
+
+
+
+### Example
+
+Here, with -m, give your tflite model path i.e. till `*.tflite file`, and it will copy model file to assets folder to build andoid app.
+```
+    python build_apk.py -q "<QNN_SDK_PATH>" -m "Path\to\TFLITE\Model"
+```
+
+Also, you can use AI-HUB Model name as mentioned in models directory, to directly export the model from AI-Hub and copy it to app assets.
+
+```
+    python build_apk.py -q "<QNN_SDK_PATH>" -e <Model Name>
+```
diff --git a/apps/android/ImageClassification/build.gradle b/apps/android/ImageClassification/build.gradle
new file mode 100644
index 00000000..798f7515
--- /dev/null
+++ b/apps/android/ImageClassification/build.gradle
@@ -0,0 +1,10 @@
+
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+    id 'com.android.application' version '7.2.1' apply false
+    id 'com.android.library' version '7.2.1' apply false
+}
+
+task clean(type: Delete) {
+    delete rootProject.buildDir
+}
diff --git a/apps/android/ImageClassification/build_apk.py b/apps/android/ImageClassification/build_apk.py
new file mode 100644
index 00000000..66e6cd0d
--- /dev/null
+++ b/apps/android/ImageClassification/build_apk.py
@@ -0,0 +1,163 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import argparse
+import glob
+import os
+import shutil
+import subprocess
+import sys
+from enum import Enum
+
+
+class MODELNAME(Enum):
+    mobilenet_v3_large = 1
+    resnet50 = 2
+    resnext50 = 3
+    inception_v3 = 4
+
+
+def printmenu():
+    print("*****************************")
+    print("*       TYPE OF MODEL       *")
+    print("*****************************")
+    for m in MODELNAME:
+        print(str(m.value) + ". " + m.name)
+    print("*****************************")
+
+
+## Initialize parser
+parser = argparse.ArgumentParser()
+parser.add_argument("-q", "--qnnsdk", required=True, help="Give path of QNN SDK")
+
+parser.add_argument("-m", "--model_name", type=str, help="Model Name")
+
+
+# group = parser.add_mutually_exclusive_group()
+# group.add_argument('-stopdownload', '--stopdownload', action = "store_true", help = "Do NOT Download Model from AI HUB")
+parser.add_argument("-path", "--model_path", type=str, help="TFLITE model file")
+
+args = parser.parse_args()
+
+
+##based on this pre-post can be decided
+if not args.model_name:
+    printmenu()
+    inp_model_name = int(input("Please select one: "))
+    args.model_name = MODELNAME(inp_model_name).name
+
+
+destAsset = os.path.join(".", "classification", "src", "main", "assets")
+if not os.path.exists(destAsset):
+    os.makedirs(destAsset)
+
+
+## MODEL PATH NOT MENTIONED, add information into model_path
+if not args.model_path:
+    exportstatus = input("Do you want us to download the model from AI hub (y/n)")
+
+    ##DOWNLAOD USING EXPORT.PY
+    if exportstatus.lower().startswith("y"):
+        print("EXPORT form path")
+        pathtomodel = os.path.join(
+            "..",
+            "..",
+            "..",
+            "",
+            "qai_hub_models",
+            "models",
+            args.model_name,
+            "export.py",
+        )
+        if not os.path.exists(pathtomodel):
+            print("PATH DO NOT EXIST: " + pathtomodel)
+            exit()
+        subprocess.run(["python", pathtomodel, "--skip-inferencing"])
+        tflite_file = glob.glob(
+            "build" + os.sep + args.model_name + os.sep + "*.tflite", recursive=True
+        )
+        args.model_path = tflite_file[0]
+        # shutil.copy(tflite_file[0], destAsset+os.sep+"superresmodel.tflite")
+
+    ##GET USER TO GIVE PATH
+    else:
+        args.model_path = input("Give model File as input")
+        # if not os.path.exists(tflite_file):
+        # print("PATH DO NOT EXIST: "+tflite_file)
+        # exit()
+        # shutil.copy(tflite_file, destAsset+os.sep+"superresmodel.tflite")
+
+
+if args.model_path:
+    print(args.model_path)
+    if not os.path.exists(args.model_path):
+        print("PATH DO NOT EXIST: " + args.model_path)
+        exit()
+    shutil.copy(args.model_path, destAsset + os.sep + "classification.tflite")
+
+
+## COPYING REQUIRED FILES FROM QNN SDK
+destJNI = os.path.join(".", "classification", "src", "main", "jniLibs", "arm64-v8a")
+if not os.path.exists(destJNI):
+    os.makedirs(destJNI)
+
+# copy *.so from $qnn_sdk/libs/aarch64-android to $jni_lib_dir
+qnnbasiclibs = os.path.join(args.qnnsdk, "lib", "aarch64-android")
+shutil.copytree(qnnbasiclibs, destJNI, dirs_exist_ok=True)
+
+# copy $qnn_sdk/lib/hexagon-v**/unsigned/libQnnHtpV**Skel.so to $jni_lib_dir
+skelstubfiles = os.path.join(args.qnnsdk, "lib", "hexagon-v**", "unsigned", "*.so")
+for file in glob.glob(skelstubfiles):
+    shutil.copy(file, destJNI)
+
+# copy qtld-release.aar to $test_app_root/Application/
+destaar = os.path.join(".", "classification", "libs")
+if not os.path.exists(destaar):
+    os.makedirs(destaar)
+aarfile = os.path.join(args.qnnsdk, "lib", "android", "qtld-release.aar")
+shutil.copy(aarfile, destaar)
+
+
+## BUILDING APK
+if sys.platform.startswith("win"):
+    print("Detected platform is windows")
+    gradleoutput = subprocess.run(["gradlew.bat", "assembleDebug"], cwd=".")
+elif sys.platform.startswith("darwin"):
+    print("Detected platform is MAC")
+    gradleoutput = subprocess.run(["./gradlew", "assembleDebug"], cwd=".")
+else:
+    print("Detected platform is Linux")
+    gradleoutput = subprocess.run(["./gradlew", "assembleDebug"], cwd=".")
+
+
+## COPYING APK TO CWD
+ApkPath = os.path.join(
+    os.getcwd(),
+    "classification",
+    "build",
+    "outputs",
+    "apk",
+    "debug",
+    "classification-debug.apk",
+)
+print("APK Is copied at current Working Directory")
+shutil.copy(ApkPath, ".")
+
+
+install_perm = input("Do you want to install this apk in connected device")
+## INSTALLING AND RUNNING APK
+if install_perm.lower().startswith("y"):
+    command_to_install = ["adb", "install", "classification-debug.apk"]
+    subprocess.run(command_to_install, cwd=".")
+    command_to_run = [
+        "adb",
+        "shell",
+        "am",
+        "start",
+        "-a",
+        "com.example.ACTION_NAME",
+        "-n",
+        "com.qcom.imagesuperres/com.qcom.imagesuperres.QNNActivity",
+    ]
+    subprocess.run(command_to_run, cwd=".")
diff --git a/apps/android/ImageClassification/classification/build.gradle b/apps/android/ImageClassification/classification/build.gradle
new file mode 100644
index 00000000..5f289662
--- /dev/null
+++ b/apps/android/ImageClassification/classification/build.gradle
@@ -0,0 +1,63 @@
+
+plugins {
+    id 'com.android.application'
+}
+
+
+
+android {
+    compileSdk 32
+
+    defaultConfig {
+        applicationId "com.qcom.imageclassification"
+        minSdk 26
+        targetSdk 32
+        versionCode 1
+        versionName "1.0"
+
+        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+        externalNativeBuild {
+            cmake {
+                cppFlags ''
+            }
+        }
+
+    }
+    aaptOptions {
+        noCompress "tflite"
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+
+    packagingOptions
+    {
+        doNotStrip "**/*.so"
+    }
+}
+project.ext.LIB_DIR = projectDir.toString() + '/libs/'
+project.ext.ASSET_DIR = projectDir.toString() + '/src/main/assets'
+
+dependencies {
+
+    implementation 'androidx.appcompat:appcompat:1.4.2'
+    implementation 'com.google.android.material:material:1.6.1'
+    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
+    testImplementation 'junit:junit:4.13.2'
+    androidTestImplementation 'androidx.test.ext:junit:1.1.3'
+    androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0'
+    implementation 'org.tensorflow:tensorflow-lite:2.13.0'
+    implementation 'org.tensorflow:tensorflow-lite-select-tf-ops:2.9.0'
+    implementation 'org.tensorflow:tensorflow-lite-support:0.4.3'
+    implementation fileTree(dir: "libs", include: ["*.aar"])
+    implementation files('libs/qtld-release')
+
+}
diff --git a/apps/android/ImageClassification/classification/proguard-rules.pro b/apps/android/ImageClassification/classification/proguard-rules.pro
new file mode 100644
index 00000000..36e00091
--- /dev/null
+++ b/apps/android/ImageClassification/classification/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
diff --git a/apps/android/ImageClassification/classification/src/main/AndroidManifest.xml b/apps/android/ImageClassification/classification/src/main/AndroidManifest.xml
new file mode 100644
index 00000000..598795bf
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/AndroidManifest.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools"
+    package="com.qcom.imageclassification">
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
+    <uses-permission android:name="android.permission.MANAGE_EXTERNAL_STORAGE"/>
+<application
+    android:allowBackup="true"
+    android:extractNativeLibs="true"
+    android:icon="@drawable/image_classification_icon"
+    android:label="@string/app_name"
+    android:roundIcon="@mipmap/ic_launcher_round"
+    android:supportsRtl="true"
+    android:requestLegacyExternalStorage="true"
+    android:theme="@style/Theme.Enhancement">
+
+    <uses-native-library
+        android:name="libcdsprpc.so"
+        android:required="true" />
+
+    <uses-native-library
+        android:name="libOpenCL.so"
+        android:required="true" />
+
+    <activity android:name="com.qcom.imageclassification.QNNActivity"
+        android:exported="true"
+        android:screenOrientation="portrait"
+        >
+        <intent-filter>
+            <action android:name="android.intent.action.MAIN" />
+            <category android:name="android.intent.category.LAUNCHER" />
+        </intent-filter>
+    </activity>
+</application>
+
+</manifest>
diff --git a/apps/android/ImageClassification/classification/src/main/assets/Sample1.png b/apps/android/ImageClassification/classification/src/main/assets/Sample1.png
new file mode 100644
index 00000000..24969fbd
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/assets/Sample1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8649d7ebb60c7e52ef496739616111a1ecd9797e7ecec3e1881504449a648077
+size 3155242
diff --git a/apps/android/ImageClassification/classification/src/main/assets/Sample2.png b/apps/android/ImageClassification/classification/src/main/assets/Sample2.png
new file mode 100644
index 00000000..eed3f738
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/assets/Sample2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d4a1bdf3ca10aa7d7c03fdceccda5ef3f6811bcbe3cb0968a8d5c606e572529
+size 1284181
diff --git a/apps/android/ImageClassification/classification/src/main/assets/Sample3.png b/apps/android/ImageClassification/classification/src/main/assets/Sample3.png
new file mode 100644
index 00000000..319ca063
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/assets/Sample3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c18dc2c8ef40fddddf2a68367ba2a434427518beae80f83777036d08e9c04e1
+size 1335289
diff --git a/apps/android/ImageClassification/classification/src/main/assets/Sample4.png b/apps/android/ImageClassification/classification/src/main/assets/Sample4.png
new file mode 100644
index 00000000..24969fbd
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/assets/Sample4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8649d7ebb60c7e52ef496739616111a1ecd9797e7ecec3e1881504449a648077
+size 3155242
diff --git a/apps/android/ImageClassification/classification/src/main/assets/Sample5.png b/apps/android/ImageClassification/classification/src/main/assets/Sample5.png
new file mode 100644
index 00000000..eed3f738
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/assets/Sample5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d4a1bdf3ca10aa7d7c03fdceccda5ef3f6811bcbe3cb0968a8d5c606e572529
+size 1284181
diff --git a/apps/android/ImageClassification/classification/src/main/assets/labels.txt b/apps/android/ImageClassification/classification/src/main/assets/labels.txt
new file mode 100644
index 00000000..a85dcd85
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/assets/labels.txt
@@ -0,0 +1,1001 @@
+background
+tench
+goldfish
+great white shark
+tiger shark
+hammerhead
+electric ray
+stingray
+cock
+hen
+ostrich
+brambling
+goldfinch
+house finch
+junco
+indigo bunting
+robin
+bulbul
+jay
+magpie
+chickadee
+water ouzel
+kite
+bald eagle
+vulture
+great grey owl
+European fire salamander
+common newt
+eft
+spotted salamander
+axolotl
+bullfrog
+tree frog
+tailed frog
+loggerhead
+leatherback turtle
+mud turtle
+terrapin
+box turtle
+banded gecko
+common iguana
+American chameleon
+whiptail
+agama
+frilled lizard
+alligator lizard
+Gila monster
+green lizard
+African chameleon
+Komodo dragon
+African crocodile
+American alligator
+triceratops
+thunder snake
+ringneck snake
+hognose snake
+green snake
+king snake
+garter snake
+water snake
+vine snake
+night snake
+boa constrictor
+rock python
+Indian cobra
+green mamba
+sea snake
+horned viper
+diamondback
+sidewinder
+trilobite
+harvestman
+scorpion
+black and gold garden spider
+barn spider
+garden spider
+black widow
+tarantula
+wolf spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse
+prairie chicken
+peacock
+quail
+partridge
+African grey
+macaw
+sulphur-crested cockatoo
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+drake
+red-breasted merganser
+goose
+black swan
+tusker
+echidna
+platypus
+wallaby
+koala
+wombat
+jellyfish
+sea anemone
+brain coral
+flatworm
+nematode
+conch
+snail
+slug
+sea slug
+chiton
+chambered nautilus
+Dungeness crab
+rock crab
+fiddler crab
+king crab
+American lobster
+spiny lobster
+crayfish
+hermit crab
+isopod
+white stork
+black stork
+spoonbill
+flamingo
+little blue heron
+American egret
+bittern
+crane
+limpkin
+European gallinule
+American coot
+bustard
+ruddy turnstone
+red-backed sandpiper
+redshank
+dowitcher
+oystercatcher
+pelican
+king penguin
+albatross
+grey whale
+killer whale
+dugong
+sea lion
+Chihuahua
+Japanese spaniel
+Maltese dog
+Pekinese
+Shih-Tzu
+Blenheim spaniel
+papillon
+toy terrier
+Rhodesian ridgeback
+Afghan hound
+basset
+beagle
+bloodhound
+bluetick
+black-and-tan coonhound
+Walker hound
+English foxhound
+redbone
+borzoi
+Irish wolfhound
+Italian greyhound
+whippet
+Ibizan hound
+Norwegian elkhound
+otterhound
+Saluki
+Scottish deerhound
+Weimaraner
+Staffordshire bullterrier
+American Staffordshire terrier
+Bedlington terrier
+Border terrier
+Kerry blue terrier
+Irish terrier
+Norfolk terrier
+Norwich terrier
+Yorkshire terrier
+wire-haired fox terrier
+Lakeland terrier
+Sealyham terrier
+Airedale
+cairn
+Australian terrier
+Dandie Dinmont
+Boston bull
+miniature schnauzer
+giant schnauzer
+standard schnauzer
+Scotch terrier
+Tibetan terrier
+silky terrier
+soft-coated wheaten terrier
+West Highland white terrier
+Lhasa
+flat-coated retriever
+curly-coated retriever
+golden retriever
+Labrador retriever
+Chesapeake Bay retriever
+German short-haired pointer
+vizsla
+English setter
+Irish setter
+Gordon setter
+Brittany spaniel
+clumber
+English springer
+Welsh springer spaniel
+cocker spaniel
+Sussex spaniel
+Irish water spaniel
+kuvasz
+schipperke
+groenendael
+malinois
+briard
+kelpie
+komondor
+Old English sheepdog
+Shetland sheepdog
+collie
+Border collie
+Bouvier des Flandres
+Rottweiler
+German shepherd
+Doberman
+miniature pinscher
+Greater Swiss Mountain dog
+Bernese mountain dog
+Appenzeller
+EntleBucher
+boxer
+bull mastiff
+Tibetan mastiff
+French bulldog
+Great Dane
+Saint Bernard
+Eskimo dog
+malamute
+Siberian husky
+dalmatian
+affenpinscher
+basenji
+pug
+Leonberg
+Newfoundland
+Great Pyrenees
+Samoyed
+Pomeranian
+chow
+keeshond
+Brabancon griffon
+Pembroke
+Cardigan
+toy poodle
+miniature poodle
+standard poodle
+Mexican hairless
+timber wolf
+white wolf
+red wolf
+coyote
+dingo
+dhole
+African hunting dog
+hyena
+red fox
+kit fox
+Arctic fox
+grey fox
+tabby
+tiger cat
+Persian cat
+Siamese cat
+Egyptian cat
+cougar
+lynx
+leopard
+snow leopard
+jaguar
+lion
+tiger
+cheetah
+brown bear
+American black bear
+ice bear
+sloth bear
+mongoose
+meerkat
+tiger beetle
+ladybug
+ground beetle
+long-horned beetle
+leaf beetle
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant
+grasshopper
+cricket
+walking stick
+cockroach
+mantis
+cicada
+leafhopper
+lacewing
+dragonfly
+damselfly
+admiral
+ringlet
+monarch
+cabbage butterfly
+sulphur butterfly
+lycaenid
+starfish
+sea urchin
+sea cucumber
+wood rabbit
+hare
+Angora
+hamster
+porcupine
+fox squirrel
+marmot
+beaver
+guinea pig
+sorrel
+zebra
+hog
+wild boar
+warthog
+hippopotamus
+ox
+water buffalo
+bison
+ram
+bighorn
+ibex
+hartebeest
+impala
+gazelle
+Arabian camel
+llama
+weasel
+mink
+polecat
+black-footed ferret
+otter
+skunk
+badger
+armadillo
+three-toed sloth
+orangutan
+gorilla
+chimpanzee
+gibbon
+siamang
+guenon
+patas
+baboon
+macaque
+langur
+colobus
+proboscis monkey
+marmoset
+capuchin
+howler monkey
+titi
+spider monkey
+squirrel monkey
+Madagascar cat
+indri
+Indian elephant
+African elephant
+lesser panda
+giant panda
+barracouta
+eel
+coho
+rock beauty
+anemone fish
+sturgeon
+gar
+lionfish
+puffer
+abacus
+abaya
+academic gown
+accordion
+acoustic guitar
+aircraft carrier
+airliner
+airship
+altar
+ambulance
+amphibian
+analog clock
+apiary
+apron
+ashcan
+assault rifle
+backpack
+bakery
+balance beam
+balloon
+ballpoint
+Band Aid
+banjo
+bannister
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel
+barrow
+baseball
+basketball
+bassinet
+bassoon
+bathing cap
+bath towel
+bathtub
+beach wagon
+beacon
+beaker
+bearskin
+beer bottle
+beer glass
+bell cote
+bib
+bicycle-built-for-two
+bikini
+binder
+binoculars
+birdhouse
+boathouse
+bobsled
+bolo tie
+bonnet
+bookcase
+bookshop
+bottlecap
+bow
+bow tie
+brass
+brassiere
+breakwater
+breastplate
+broom
+bucket
+buckle
+bulletproof vest
+bullet train
+butcher shop
+cab
+caldron
+candle
+cannon
+canoe
+can opener
+cardigan
+car mirror
+carousel
+carpenter's kit
+carton
+car wheel
+cash machine
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello
+cellular telephone
+chain
+chainlink fence
+chain mail
+chain saw
+chest
+chiffonier
+chime
+china cabinet
+Christmas stocking
+church
+cinema
+cleaver
+cliff dwelling
+cloak
+clog
+cocktail shaker
+coffee mug
+coffeepot
+coil
+combination lock
+computer keyboard
+confectionery
+container ship
+convertible
+corkscrew
+cornet
+cowboy boot
+cowboy hat
+cradle
+crane
+crash helmet
+crate
+crib
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam
+desk
+desktop computer
+dial telephone
+diaper
+digital clock
+digital watch
+dining table
+dishrag
+dishwasher
+disk brake
+dock
+dogsled
+dome
+doormat
+drilling platform
+drum
+drumstick
+dumbbell
+Dutch oven
+electric fan
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso maker
+face powder
+feather boa
+file
+fireboat
+fire engine
+fire screen
+flagpole
+flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster
+freight car
+French horn
+frying pan
+fur coat
+garbage truck
+gasmask
+gas pump
+goblet
+go-kart
+golf ball
+golfcart
+gondola
+gong
+gown
+grand piano
+greenhouse
+grille
+grocery store
+guillotine
+hair slide
+hair spray
+half track
+hammer
+hamper
+hand blower
+hand-held computer
+handkerchief
+hard disc
+harmonica
+harp
+harvester
+hatchet
+holster
+home theater
+honeycomb
+hook
+hoopskirt
+horizontal bar
+horse cart
+hourglass
+iPod
+iron
+jack-o'-lantern
+jean
+jeep
+jersey
+jigsaw puzzle
+jinrikisha
+joystick
+kimono
+knee pad
+knot
+lab coat
+ladle
+lampshade
+laptop
+lawn mower
+lens cap
+letter opener
+library
+lifeboat
+lighter
+limousine
+liner
+lipstick
+Loafer
+lotion
+loudspeaker
+loupe
+lumbermill
+magnetic compass
+mailbag
+mailbox
+maillot
+maillot
+manhole cover
+maraca
+marimba
+mask
+matchstick
+maypole
+maze
+measuring cup
+medicine chest
+megalith
+microphone
+microwave
+military uniform
+milk can
+minibus
+miniskirt
+minivan
+missile
+mitten
+mixing bowl
+mobile home
+Model T
+modem
+monastery
+monitor
+moped
+mortar
+mortarboard
+mosque
+mosquito net
+motor scooter
+mountain bike
+mountain tent
+mouse
+mousetrap
+moving van
+muzzle
+nail
+neck brace
+necklace
+nipple
+notebook
+obelisk
+oboe
+ocarina
+odometer
+oil filter
+organ
+oscilloscope
+overskirt
+oxcart
+oxygen mask
+packet
+paddle
+paddlewheel
+padlock
+paintbrush
+pajama
+palace
+panpipe
+paper towel
+parachute
+parallel bars
+park bench
+parking meter
+passenger car
+patio
+pay-phone
+pedestal
+pencil box
+pencil sharpener
+perfume
+Petri dish
+photocopier
+pick
+pickelhaube
+picket fence
+pickup
+pier
+piggy bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate
+pitcher
+plane
+planetarium
+plastic bag
+plate rack
+plow
+plunger
+Polaroid camera
+pole
+police van
+poncho
+pool table
+pop bottle
+pot
+potter's wheel
+power drill
+prayer rug
+printer
+prison
+projectile
+projector
+puck
+punching bag
+purse
+quill
+quilt
+racer
+racket
+radiator
+radio
+radio telescope
+rain barrel
+recreational vehicle
+reel
+reflex camera
+refrigerator
+remote control
+restaurant
+revolver
+rifle
+rocking chair
+rotisserie
+rubber eraser
+rugby ball
+rule
+running shoe
+safe
+safety pin
+saltshaker
+sandal
+sarong
+sax
+scabbard
+scale
+school bus
+schooner
+scoreboard
+screen
+screw
+screwdriver
+seat belt
+sewing machine
+shield
+shoe shop
+shoji
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+ski mask
+sleeping bag
+slide rule
+sliding door
+slot
+snorkel
+snowmobile
+snowplow
+soap dispenser
+soccer ball
+sock
+solar dish
+sombrero
+soup bowl
+space bar
+space heater
+space shuttle
+spatula
+speedboat
+spider web
+spindle
+sports car
+spotlight
+stage
+steam locomotive
+steel arch bridge
+steel drum
+stethoscope
+stole
+stone wall
+stopwatch
+stove
+strainer
+streetcar
+stretcher
+studio couch
+stupa
+submarine
+suit
+sundial
+sunglass
+sunglasses
+sunscreen
+suspension bridge
+swab
+sweatshirt
+swimming trunks
+swing
+switch
+syringe
+table lamp
+tank
+tape player
+teapot
+teddy
+television
+tennis ball
+thatch
+theater curtain
+thimble
+thresher
+throne
+tile roof
+toaster
+tobacco shop
+toilet seat
+torch
+totem pole
+tow truck
+toyshop
+tractor
+trailer truck
+tray
+trench coat
+tricycle
+trimaran
+tripod
+triumphal arch
+trolleybus
+trombone
+tub
+turnstile
+typewriter keyboard
+umbrella
+unicycle
+upright
+vacuum
+vase
+vault
+velvet
+vending machine
+vestment
+viaduct
+violin
+volleyball
+waffle iron
+wall clock
+wallet
+wardrobe
+warplane
+washbasin
+washer
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+wig
+window screen
+window shade
+Windsor tie
+wine bottle
+wing
+wok
+wooden spoon
+wool
+worm fence
+wreck
+yawl
+yurt
+web site
+comic book
+crossword puzzle
+street sign
+traffic light
+book jacket
+menu
+plate
+guacamole
+consomme
+hot pot
+trifle
+ice cream
+ice lolly
+French loaf
+bagel
+pretzel
+cheeseburger
+hotdog
+mashed potato
+head cabbage
+broccoli
+cauliflower
+zucchini
+spaghetti squash
+acorn squash
+butternut squash
+cucumber
+artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith
+strawberry
+orange
+lemon
+fig
+pineapple
+banana
+jackfruit
+custard apple
+pomegranate
+hay
+carbonara
+chocolate sauce
+dough
+meat loaf
+pizza
+potpie
+burrito
+red wine
+espresso
+cup
+eggnog
+alp
+bubble
+cliff
+coral reef
+geyser
+lakeside
+promontory
+sandbar
+seashore
+valley
+volcano
+ballplayer
+groom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper
+corn
+acorn
+hip
+buckeye
+coral fungus
+agaric
+gyromitra
+stinkhorn
+earthstar
+hen-of-the-woods
+bolete
+ear
+toilet tissue
diff --git a/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassification.java b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassification.java
new file mode 100644
index 00000000..50adfcf8
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassification.java
@@ -0,0 +1,214 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imageclassification;
+
+import android.content.Context;
+import android.graphics.Bitmap;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.MappedByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import com.qualcomm.qti.QnnDelegate;
+import org.tensorflow.lite.Interpreter;
+import android.content.res.AssetFileDescriptor;
+import android.content.res.AssetManager;
+import android.util.Log;
+import android.widget.Toast;
+import java.io.FileInputStream;
+import java.nio.channels.FileChannel;
+
+public class ImageClassification {
+
+    private Context context;
+    private MappedByteBuffer tfLiteModel;
+    private Interpreter tfLite;
+    private Interpreter tfLite_QNN;
+    private QnnDelegate qnnDelegate = null;
+    private static final String TAG = "Sahin";
+    private static final float IMAGE_MEAN = 127.7f;
+    private static final float IMAGE_STD =128f;
+    private List<String> labelList;
+    private static final String LABEL_PATH = "labels.txt";
+    boolean model_loaded = false;
+    public boolean getBuildStatus()
+    {
+        return model_loaded;
+    }
+    public void close()
+    {
+        if(qnnDelegate!=null) {
+            qnnDelegate.close();
+        }
+
+        if(tfLite != null){
+            tfLite.close();
+        }
+
+        if(tfLiteModel!=null)
+            tfLiteModel.clear();
+
+        if(labelList!=null)
+            labelList.clear();
+    }
+    public boolean initializeModel(Context context,String TFLITE_FILE) throws IOException {
+
+        this.context = context;
+
+        try {
+            tfLiteModel = loadModelFile(context.getApplicationContext().getAssets(), TFLITE_FILE);
+            Log.i(TAG, "MODEL LOADED");
+            Interpreter.Options tfLiteOptions = new Interpreter.Options();
+            tfLiteOptions.setNumThreads(4);
+            tfLiteOptions.setUseXNNPACK(true);
+
+            tfLite = new Interpreter(tfLiteModel, tfLiteOptions);
+
+            QnnDelegate.Options options = new QnnDelegate.Options();
+            options.setBackendType(QnnDelegate.Options.BackendType.HTP_BACKEND);
+            options.setHtpPerformanceMode(QnnDelegate.Options.HtpPerformanceMode.HTP_PERFORMANCE_BURST);
+            options.setHtpPrecision(QnnDelegate.Options.HtpPrecision.HTP_PRECISION_FP16);
+
+            Log.i(TAG, "NATIVE LIB PATH: " + context.getApplicationInfo().nativeLibraryDir);
+            options.setSkelLibraryDir(context.getApplicationInfo().nativeLibraryDir);
+
+            qnnDelegate = new QnnDelegate(options);
+            tfLiteOptions.addDelegate(qnnDelegate);
+            tfLite_QNN = new Interpreter(tfLiteModel,tfLiteOptions);
+            Log.i(TAG, "QnnDelegate Option Added");
+            model_loaded= true;
+            Log.d(TAG,"Label list Loaded Successfully");
+            labelList =loadLabelList(LABEL_PATH);
+            return true;
+
+        } catch (IOException e) {
+            Log.e(TAG,"TFLite Model Loading Unsuccessfull");
+            e.printStackTrace();
+            return false;
+        }
+    }
+
+
+    public static List<Integer> findTop3Indices(float[] arr) {
+        List<Integer> topIndices = new ArrayList<>();
+
+        for (int i = 0; i < 3; i++) {
+            int maxIndex = 0;
+            float maxValue = arr[0];
+
+            for (int j = 1; j < arr.length; j++) {
+                if (arr[j] > maxValue && !topIndices.contains(j)) {
+                    maxValue = arr[j];
+                    maxIndex = j;
+                }
+            }
+
+            topIndices.add(maxIndex);
+        }
+
+        return topIndices;
+    }
+
+    public static MappedByteBuffer loadModelFile(AssetManager assets, String modelFilename)
+            throws IOException {
+        AssetFileDescriptor fileDescriptor = assets.openFd(modelFilename);
+        FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
+        FileChannel fileChannel = inputStream.getChannel();
+        long startOffset = fileDescriptor.getStartOffset();
+        long declaredLength = fileDescriptor.getDeclaredLength();
+        return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+    }
+
+    public Result<ImageClassificationResult> inference(Bitmap[] images, String backend) {
+        System.out.println("Processing %d images %dx%d."+ images.length+ images[0].getWidth()+ images[0].getHeight());
+
+        try {
+
+            long Preprocessing_StartTime = System.nanoTime();
+            Log.d(TAG,"Image Preprocessing");
+
+            Utils util = new Utils();
+            List<Float> img_mean = Arrays.asList(IMAGE_MEAN, IMAGE_MEAN, IMAGE_MEAN);
+            List<Float> img_std = Arrays.asList(IMAGE_STD, IMAGE_STD, IMAGE_STD);
+
+            int[] arr = tfLite.getInputTensor(0).shape(); //FOR VISION MODEL - input is normally like (B,H,W,C)
+            int channel = arr[3];
+            int input_dims1 = arr[1];
+            int input_dims2 = arr[2];
+
+            Bitmap scaledBitmap = Bitmap.createScaledBitmap(images[0],input_dims1,input_dims2,true);
+
+            float[][][][] floatinputarray = new float[1][input_dims1][input_dims1][channel];
+            util.PreProcess(scaledBitmap, input_dims1, input_dims2, floatinputarray, img_mean, img_std);
+
+            long Preprocessing_EndTime = System.nanoTime();
+            long Preporccsing_TimeDiff=Preprocessing_EndTime-Preprocessing_StartTime;
+
+            Log.d(TAG,"Preprocessing Time: "+Preporccsing_TimeDiff/1000000+"ms");
+
+            Object[] inputArray = {floatinputarray};
+            float[][] floatoutputarray = new float[1][1000];
+            Map<Integer, Object> outputMap = new HashMap<>();
+            outputMap.put(0, floatoutputarray);
+
+            long inferenceStartTime = System.nanoTime();
+
+            if (backend.equals("NPU") && tfLite_QNN != null) {
+                System.out.println("NPU BACKEND");
+                tfLite_QNN.runForMultipleInputsOutputs(inputArray, outputMap);
+            }
+            else if (backend.equals("CPU") && tfLite != null) {
+                System.out.println("TFLITE BACKEND");
+                tfLite.runForMultipleInputsOutputs(inputArray, outputMap);
+            }
+            else
+            {
+                System.out.println("Sycronisation issue");
+            }
+
+            Log.i(TAG, "MODEL EXECUTED");
+            long inferenceEndTime = System.nanoTime();
+            long TimeDiff=inferenceEndTime-inferenceStartTime;
+
+            Toast.makeText(context,"Inference Time: "+TimeDiff/1000000+"ms",Toast.LENGTH_SHORT).show();
+            Log.i(TAG,"Inference Completed");
+
+            String res="";
+            List<Integer> indexList = findTop3Indices(floatoutputarray[0]);
+
+            for(int i=0;i<3;i++){
+                res+=labelList.get(indexList.get(i)+1)+", ";
+            }
+
+            res = res.substring(0, res.length() - 2); //Removing comma from last
+
+            ImageClassificationResult result = new ImageClassificationResult(indexList, res);
+
+            return new Result<>(result,
+                    (inferenceEndTime - inferenceStartTime) / 1000000);
+
+        } catch (Exception ex) {
+            ex.printStackTrace();
+            return null;
+
+        }
+    }
+
+    private List<String> loadLabelList(String labelPath) throws IOException {
+        List<String> labelList = new ArrayList<>();
+        AssetManager assetManager= context.getAssets();
+        BufferedReader reader = new BufferedReader(new InputStreamReader(assetManager.open(labelPath)));
+        String line;
+        while ((line = reader.readLine()) != null) {
+            labelList.add(line);
+        }
+        reader.close();
+        return labelList;
+    }
+}
diff --git a/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassificationResult.java b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassificationResult.java
new file mode 100644
index 00000000..51c797fa
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/ImageClassificationResult.java
@@ -0,0 +1,32 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imageclassification;
+
+import android.graphics.Bitmap;
+import android.media.Image;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ImageClassificationResult {
+
+    private List<Integer> topindices;
+    private String ResultString;
+
+    public ImageClassificationResult(List<Integer> customlist, String res)
+    {
+        this.topindices = customlist;
+        this.ResultString = res;
+    }
+
+    public List<Integer> getIndices()
+    {
+        return  topindices;
+    }
+    public String getResultString()
+    {
+        return ResultString;
+    }
+}
diff --git a/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/QNNActivity.java b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/QNNActivity.java
new file mode 100644
index 00000000..38ba98ea
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/QNNActivity.java
@@ -0,0 +1,177 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imageclassification;
+
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.os.Bundle;
+import android.view.View;
+import android.widget.AdapterView;
+import android.widget.ArrayAdapter;
+import android.widget.ImageView;
+import android.widget.RadioButton;
+import android.widget.RadioGroup;
+import android.widget.Spinner;
+import android.widget.TextView;
+import android.widget.Toast;
+
+import androidx.appcompat.app.AppCompatActivity;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class QNNActivity extends AppCompatActivity {
+
+    public static InputStream originalFile = null;
+    ImageClassification imageClassification;
+    private final static String TFLITE_FILE = "classification.tflite";
+
+    //creating objects for UI element used in layout files (activity_classification.xml)
+    RadioButton rb1, rb2, rb3;
+
+    String prev_runtime = "";
+    ImageView imageView;
+    RadioGroup radioGroup;
+    TextView predicted_view;
+    Bitmap bmps = null;
+    private boolean spinInitialized = false;
+    private boolean radioGroupInitialized = false;
+    public static Result<ImageClassificationResult> result = null;
+    Spinner spin;
+    private static final String TAG="Image_Classification";
+
+      String[] options = {"No Selection","Sample1.png","Sample2.png","Sample3.png"}; //Image filenames on which model inference is made
+    protected void executeRadioButton(int checkedId) {
+        switch (checkedId) {
+            case R.id.rb1:
+                // set text for your textview here
+                System.out.println("CPU instance running");
+                result = process(bmps, "CPU");
+                break;
+            case R.id.rb2:
+                // set text for your textview here
+                System.out.println("NPU instance running");
+                System.out.println("Device runtime " + "NPU");
+                result = process(bmps, "NPU");
+                break;
+            default:
+                System.out.println("Do Nothing");
+        }
+    }
+
+    @Override
+    protected void onCreate(Bundle savedInstanceState) {
+        super.onCreate(savedInstanceState);
+
+        //Initialization
+        setContentView(R.layout.activity_classification);
+        rb1 = (RadioButton) findViewById(R.id.rb1);
+        rb2 = (RadioButton) findViewById(R.id.rb2);
+        imageView = (ImageView) findViewById(R.id.im1);
+        radioGroup = (RadioGroup) findViewById(R.id.rg1);
+        spin = (Spinner) findViewById((R.id.spinner));
+        predicted_view=(TextView)findViewById(R.id.textView4);
+
+        predicted_view.setVisibility(View.INVISIBLE);
+
+        imageClassification = new ImageClassification();
+
+        ArrayAdapter ad = new ArrayAdapter(this, android.R.layout.simple_spinner_item, options);
+        ad.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item);
+        spin.setAdapter(ad);
+
+
+        radioGroup.setOnCheckedChangeListener(new RadioGroup.OnCheckedChangeListener() {
+            @Override
+            public void onCheckedChanged(RadioGroup group, int checkedId) {
+                if (originalFile!=null && bmps!=null){
+                    executeRadioButton(checkedId);
+                }
+                else{
+                    if(radioGroupInitialized) {
+                        Toast.makeText(getApplicationContext(), "Please select image first", Toast.LENGTH_SHORT).show();
+                    }
+                    else
+                    {
+                        radioGroupInitialized = true;
+                    }
+                }
+            }
+        });
+
+        spin.setOnItemSelectedListener(new AdapterView.OnItemSelectedListener() {
+            @Override
+            public void onItemSelected(AdapterView<?> parent, View view, int position, long id) {
+
+                // loading picture from assets...
+                if (!parent.getItemAtPosition(position).equals("No Selection")) {
+                    try {
+                        originalFile = getAssets().open((String) parent.getItemAtPosition(position));
+                    } catch (IOException e) {
+                        e.printStackTrace();
+                    }
+
+                    // Convert input image to Bitmap
+                    bmps = BitmapFactory.decodeStream(originalFile);
+
+                    //Scaling the image size to show it on the ImageView
+                    Bitmap scaled1 = Bitmap.createScaledBitmap(bmps, 512, 512, true);
+                    try {
+                        // Set the input image in UI view
+                        imageView.setImageBitmap(scaled1);
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                    //Taking the Runtime Environment input from Radio Button
+                    int checkedID_RB = radioGroup.getCheckedRadioButtonId();
+                    if (originalFile!=null && bmps!=null && checkedID_RB !=-1){
+                        executeRadioButton(checkedID_RB);
+                    }
+                }
+                else{
+
+                    originalFile=null;
+                    bmps=null;
+                    imageView.setImageResource(R.drawable.ic_launcher_background);
+                    radioGroup.clearCheck();
+
+                    if(spinInitialized){
+                        Toast.makeText(getApplicationContext(), "Please select image first", Toast.LENGTH_SHORT).show();
+                    }
+                    else
+                    {
+                        spinInitialized = true;
+                    }
+                }
+            }
+            @Override
+            public void onNothingSelected(AdapterView<?> parent) {
+                System.out.println("Nothing");
+            }
+        });
+    }
+
+    public Result<ImageClassificationResult> process(Bitmap bmps, String run_time){
+
+        Result<ImageClassificationResult> result = null;
+        try {
+        if(imageClassification.getBuildStatus()==false)
+        		imageClassification.initializeModel(this, TFLITE_FILE);
+
+            result = imageClassification.inference(new Bitmap[] {bmps}, run_time);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        predicted_view.setVisibility(View.VISIBLE);
+        predicted_view.setText(result.getResults().getResultString());
+        return result;
+    }
+
+    @Override
+    protected void onDestroy() {
+        super.onDestroy();
+        imageClassification.close();
+    }
+}
diff --git a/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Result.java b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Result.java
new file mode 100644
index 00000000..5428bbb9
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Result.java
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imageclassification;
+import java.util.List;
+
+public class Result<E> {
+
+    private final E results;
+    private final long inferenceTime;
+    public Result(E results, long inferenceTime) {
+
+        this.results = results;
+        this.inferenceTime = inferenceTime;
+    }
+
+    public E getResults() {
+        return results;
+    }
+
+
+    public long getInferenceTime() {
+        return inferenceTime;
+    }
+
+}
diff --git a/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Utils.java b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Utils.java
new file mode 100644
index 00000000..c3f8eadf
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/java/com/qcom/imageclassification/Utils.java
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imageclassification;
+
+
+import android.graphics.Bitmap;
+import android.graphics.Color;
+import java.util.Arrays;
+import java.util.List;
+
+public class Utils {
+
+    //PRE PROCESSING Model Input
+    public void PreProcess(Bitmap inputBitmap, int input_dims1, int input_dims2, float[][][][] floatinputarray, List<Float> ImageMean, List<Float> ImageStd){
+        for (int x = 0; x < input_dims1; x++) {
+            for (int y = 0; y < input_dims2; y++) {
+                int pixel = inputBitmap.getPixel(x, y);
+                List<Float> rgb = Arrays.asList((float)Color.red(pixel), (float)Color.green(pixel), (float)Color.blue(pixel));
+                for(int z = 0;z<3; z++){
+                    floatinputarray[0][x][y][z] = (float)((rgb.get(z))-ImageMean.get(z))/ImageStd.get(z);
+                }
+            }
+        }
+    }
+}
diff --git a/apps/android/ImageClassification/classification/src/main/res/drawable-v24/ic_launcher_foreground.xml b/apps/android/ImageClassification/classification/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 00000000..1ff1154f
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:aapt="http://schemas.android.com/aapt"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
+        <aapt:attr name="android:fillColor">
+            <gradient
+                android:endX="85.84757"
+                android:endY="92.4963"
+                android:startX="42.9492"
+                android:startY="49.59793"
+                android:type="linear">
+                <item
+                    android:color="#44000000"
+                    android:offset="0.0" />
+                <item
+                    android:color="#00000000"
+                    android:offset="1.0" />
+            </gradient>
+        </aapt:attr>
+    </path>
+    <path
+        android:fillColor="#FFFFFF"
+        android:fillType="nonZero"
+        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
+        android:strokeWidth="1"
+        android:strokeColor="#00000000" />
+</vector>
diff --git a/apps/android/ImageClassification/classification/src/main/res/drawable/ic_launcher_background.xml b/apps/android/ImageClassification/classification/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 00000000..a4f78de5
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="utf-8"?>
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path
+        android:fillColor="#3DDC84"
+        android:pathData="M0,0h108v108h-108z" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M9,0L9,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,0L19,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,0L29,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,0L39,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,0L49,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,0L59,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,0L69,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,0L79,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M89,0L89,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M99,0L99,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,9L108,9"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,19L108,19"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,29L108,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,39L108,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,49L108,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,59L108,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,69L108,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,79L108,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,89L108,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,99L108,99"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,29L89,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,39L89,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,49L89,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,59L89,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,69L89,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,79L89,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,19L29,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,19L39,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,19L49,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,19L59,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,19L69,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,19L79,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+</vector>
diff --git a/apps/android/ImageClassification/classification/src/main/res/drawable/image_classification_icon.png b/apps/android/ImageClassification/classification/src/main/res/drawable/image_classification_icon.png
new file mode 100644
index 00000000..23528a78
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/drawable/image_classification_icon.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d97af3b197c294062cbb9f35bd277438befcd158eb6b47fa67552c00053dce49
+size 26951
diff --git a/apps/android/ImageClassification/classification/src/main/res/layout/activity_classification.xml b/apps/android/ImageClassification/classification/src/main/res/layout/activity_classification.xml
new file mode 100644
index 00000000..6df6e740
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/layout/activity_classification.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="utf-8"?>
+<RelativeLayout android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    android:layout_gravity="center"
+    android:background="#FAFAFA"
+    xmlns:android="http://schemas.android.com/apk/res/android">
+
+    <!--        android:adjustViewBounds="true"-->
+    <!--        android:scaleType="centerInside"/>-->
+    <TextView
+        android:id="@+id/textView5"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:layout_alignParentLeft="true"
+        android:text="Select an image : "
+        android:textColor="#344235"
+        android:textSize="18dp"
+        />
+    <Spinner
+        android:id="@+id/spinner"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:layout_alignParentRight="true"
+        android:layout_toRightOf="@+id/textView5"
+        />
+
+
+    <TextView
+        android:id="@+id/textView3"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:textSize="18dp"
+        android:layout_alignParentLeft="true"
+        android:text="Select Device runtime"
+        android:layout_centerHorizontal="true"
+        android:textColor="#344235"
+        android:layout_below="@+id/spinner" />
+    <RadioGroup
+        android:id="@+id/rg1"
+        android:layout_below="@id/textView3"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_centerHorizontal="true"
+        android:orientation="horizontal" >
+
+        <RadioButton
+            android:id="@+id/rb1"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:layout_gravity="center"
+            android:layout_marginLeft="110dp"
+            android:textStyle="bold"
+            android:text="CPU" />
+
+        <RadioButton
+            android:id="@+id/rb2"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:layout_gravity="center"
+            android:layout_marginLeft="30dp"
+            android:textStyle="bold"
+            android:text="NPU" />
+
+
+    </RadioGroup>
+
+
+    <ImageView
+        android:id="@+id/im1"
+        android:layout_below="@id/rg1"
+        android:layout_width="match_parent"
+        android:layout_height="500dp"
+        android:layout_marginTop="1dp"
+        android:src="@drawable/ic_launcher_background" />
+
+    <TextView
+        android:id="@+id/textView4"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="1dp"
+        android:textSize="18dp"
+        android:text="Predicted Result"
+        android:layout_centerHorizontal="true"
+        android:textColor="#344235"
+        android:layout_gravity="center"
+        android:textStyle="bold"
+        android:layout_below="@+id/im1" />
+
+
+</RelativeLayout>
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 00000000..3564f5b0
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 00000000..0351084b
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher.png
new file mode 100644
index 00000000..1ae43951
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ee6ca2903e3094d64110dee90aea432f9eb9bc747c7e5c134496b8f7feff3b8
+size 3593
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher_round.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher_round.png
new file mode 100644
index 00000000..e898edde
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-hdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:876e35db6919f81d28dbe042d8535414b2b3bb13d1139c16265f652ca5df65ac
+size 5339
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher.png
new file mode 100644
index 00000000..10afb4e6
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c4db47abd938c35f4926ea0b7b31b9d6c41bef1ddacb2c7685b5c6ea0890e5
+size 2636
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher_round.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher_round.png
new file mode 100644
index 00000000..dd1c781b
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-mdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9bc0ce206c8715db22eacfbceddff820f9b8a0ef3a519bbb88f7b6e65806d71
+size 3388
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher.png
new file mode 100644
index 00000000..cc082f9a
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a5e85ebec8c77c18f5c34d762949674373c7a95de57a8a82ce165c8db8cedbb
+size 4926
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher_round.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher_round.png
new file mode 100644
index 00000000..3910511a
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-xhdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81b31d4cabd3e4358db2d99e6c31474b82ae6379dc06ea0c00267f41da1bd599
+size 7472
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher.png
new file mode 100644
index 00000000..db7bbe05
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01cec2d4d6cc59f250e4bbfa445042c3a3b9ebf9788b9f34796e85c0af874da4
+size 7909
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher_round.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
new file mode 100644
index 00000000..17327017
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf24682fa050affbc3bdaae15cd5532c13758a5a0e6d5305cfad52ddefc4d571
+size 11873
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher.png
new file mode 100644
index 00000000..6413db3b
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2f8a8d39587eb912f8d217f2160c4ba4e21a60fd7a6f84849c102bcac725975
+size 10652
diff --git a/apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
new file mode 100644
index 00000000..5d1c9ba8
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7ed7010aa67cb843af6df357ff1a74ec1598d2407e4884d12633aa0f11e3a4b
+size 16570
diff --git a/apps/android/ImageClassification/classification/src/main/res/values-night/themes.xml b/apps/android/ImageClassification/classification/src/main/res/values-night/themes.xml
new file mode 100644
index 00000000..a9e583cb
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/values-night/themes.xml
@@ -0,0 +1,17 @@
+
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Theme.Enhancement" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
+        <!-- Primary brand color. -->
+        <item name="colorPrimary">@color/purple_200</item>
+        <item name="colorPrimaryVariant">@color/purple_700</item>
+        <item name="colorOnPrimary">@color/black</item>
+        <!-- Secondary brand color. -->
+        <item name="colorSecondary">@color/teal_200</item>
+        <item name="colorSecondaryVariant">@color/teal_200</item>
+        <item name="colorOnSecondary">@color/black</item>
+        <!-- Status bar color. -->
+        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
+        <!-- Customize your theme here. -->
+    </style>
+</resources>
diff --git a/apps/android/ImageClassification/classification/src/main/res/values/colors.xml b/apps/android/ImageClassification/classification/src/main/res/values/colors.xml
new file mode 100644
index 00000000..977bb9f2
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/values/colors.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<resources>
+    <color name="purple_200">#FFBB86FC</color>
+    <color name="purple_500">#FF6200EE</color>
+    <color name="purple_700">#FF3700B3</color>
+    <color name="teal_200">#FF03DAC5</color>
+    <color name="teal_700">#FF018786</color>
+    <color name="black">#FF000000</color>
+    <color name="white">#FFFFFFFF</color>
+</resources>
diff --git a/apps/android/ImageClassification/classification/src/main/res/values/strings.xml b/apps/android/ImageClassification/classification/src/main/res/values/strings.xml
new file mode 100644
index 00000000..2967480c
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/values/strings.xml
@@ -0,0 +1,4 @@
+
+<resources>
+    <string name="app_name">Image_Classification</string>
+</resources>
diff --git a/apps/android/ImageClassification/classification/src/main/res/values/themes.xml b/apps/android/ImageClassification/classification/src/main/res/values/themes.xml
new file mode 100644
index 00000000..038239bb
--- /dev/null
+++ b/apps/android/ImageClassification/classification/src/main/res/values/themes.xml
@@ -0,0 +1,18 @@
+
+
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Theme.Enhancement" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
+        <!-- Primary brand color. -->
+        <item name="colorPrimary">#8BC34A</item>
+        <item name="colorPrimaryVariant">#4CAF50</item>
+        <item name="colorOnPrimary">@color/white</item>
+        <!-- Secondary brand color. -->
+        <item name="colorSecondary">@color/teal_200</item>
+        <item name="colorSecondaryVariant">@color/teal_700</item>
+        <item name="colorOnSecondary">@color/black</item>
+        <!-- Status bar color. -->
+        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
+        <!-- Customize your theme here. -->
+    </style>
+</resources>
diff --git a/apps/android/ImageClassification/gradle.properties b/apps/android/ImageClassification/gradle.properties
new file mode 100644
index 00000000..08e95206
--- /dev/null
+++ b/apps/android/ImageClassification/gradle.properties
@@ -0,0 +1,20 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. More details, visit
+# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app"s APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Automatically convert third-party libraries to use AndroidX
+#android.enableJetifier=true
+android.nonTransitiveRClass=true
diff --git a/apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.jar b/apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 00000000..c4868dfc
--- /dev/null
+++ b/apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33ad4583fd7ee156f533778736fa1b4940bd83b433934d1cc4e9f608e99a6a89
+size 59536
diff --git a/apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.properties b/apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 00000000..f79f7cc5
--- /dev/null
+++ b/apps/android/ImageClassification/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Wed Feb 07 17:55:01 IST 2024
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/apps/android/ImageClassification/gradlew b/apps/android/ImageClassification/gradlew
new file mode 100644
index 00000000..744e882e
--- /dev/null
+++ b/apps/android/ImageClassification/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MSYS* | MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/apps/android/ImageClassification/gradlew.bat b/apps/android/ImageClassification/gradlew.bat
new file mode 100644
index 00000000..ac1b06f9
--- /dev/null
+++ b/apps/android/ImageClassification/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/apps/android/ImageClassification/settings.gradle b/apps/android/ImageClassification/settings.gradle
new file mode 100644
index 00000000..2f5039bf
--- /dev/null
+++ b/apps/android/ImageClassification/settings.gradle
@@ -0,0 +1,29 @@
+pluginManagement {
+    repositories {
+        gradlePluginPortal()
+        google()
+        mavenCentral()
+    }
+}
+
+dependencyResolutionManagement {
+    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+    repositories {
+        google()
+        mavenCentral()
+        maven {         // add this repo to use snapshots
+            name 'ossrh-snapshot'
+            url 'https://oss.sonatype.org/content/repositories/snapshots'
+
+        }
+        flatDir {
+            dirs 'libs'
+        }
+    }
+}
+
+
+
+//include ':snpe-release'
+rootProject.name = "classification"
+include ':classification'
diff --git a/apps/android/ImageSuperResolution/README.md b/apps/android/ImageSuperResolution/README.md
new file mode 100644
index 00000000..70658c0e
--- /dev/null
+++ b/apps/android/ImageSuperResolution/README.md
@@ -0,0 +1,66 @@
+### Requirements
+
+1. Java, android-sdk and sdkmanager is already set at user's end
+2. User should have Linux QNN SDK in local machine.
+3. ANDROID_HOME is set to android-sdk path
+4. AI-Hub is properly configured with user token.
+
+
+## Info
+Please execute build_apk.py. This script will compile and download a model from AI-Hub and paste it in your Android Proect and Generate superresolution-debug.apk
+
+This app takes model with image of size 128x128 as input and gives 512x512 as output. If you want, you can replace the model with any superesolution tflite model, but you have to change the pre-processing, post-processing and dimensions in the app code based on model parameters.
+
+
+## Preprocessing
+
+
+```
+    public void PreProcess(Bitmap inputBitmap, int input_dims1, int input_dims2, float[][][][] floatinputarray){
+        for (int x = 0; x < input_dims1; x++) {
+            for (int y = 0; y < input_dims2; y++) {
+                int pixel = inputBitmap.getPixel(x, y);
+                // Normalize channel values to [-1.0, 1.0]. Here, pixel values
+                // are positive so the effective range will be [0.0, 1.0]
+                floatinputarray[0][x][y][0] = (Color.red(pixel))/255.0f;
+                floatinputarray[0][x][y][1] = (Color.green(pixel))/255.0f;
+                floatinputarray[0][x][y][2] = (Color.blue(pixel))/255.0f;
+            }
+        }
+    }
+```
+
+
+## PostProcessing
+
+
+```
+    public void PostProcess(Bitmap outbmp, int output_dims1, int output_dims2, float[][][][] floatoutputarray) {
+        for (int x = 0; x < output_dims1; x++) {
+            for (int y = 0; y < output_dims2; y++) {
+                int red = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][0] * 255)));
+                int green = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][1] * 255)));
+                int blue = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][2] * 255)));
+                int color = Color.argb(255, red, green, blue);
+                outbmp.setPixel(x, y, color);
+            }
+        }
+    }
+```
+
+### Build App:
+
+You have to run build_apk.py for Image Classification. It will generate classification-debug.apk and install it in connected device.
+
+
+    build_apk.py [-h] -q QNNSDK [-m MODEL_NAME] [-path MODEL_PATH]
+
+```
+options:
+
+  -h, --help                                         show this help message and exit
+  -q QNNSDK, --qnnsdk QNNSDK                         Give path of QNN SDK (REQUIRED)
+  -m MODEL_NAME, --model_name MODEL_NAME             Model Name (Optional)
+  -path MODEL_PATH, --model_path MODEL_PATH          Model Path (Optional)
+
+```
diff --git a/apps/android/ImageSuperResolution/build.gradle b/apps/android/ImageSuperResolution/build.gradle
new file mode 100644
index 00000000..798f7515
--- /dev/null
+++ b/apps/android/ImageSuperResolution/build.gradle
@@ -0,0 +1,10 @@
+
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+    id 'com.android.application' version '7.2.1' apply false
+    id 'com.android.library' version '7.2.1' apply false
+}
+
+task clean(type: Delete) {
+    delete rootProject.buildDir
+}
diff --git a/apps/android/ImageSuperResolution/build.properties b/apps/android/ImageSuperResolution/build.properties
new file mode 100644
index 00000000..0ed786f4
--- /dev/null
+++ b/apps/android/ImageSuperResolution/build.properties
@@ -0,0 +1,2 @@
+MODELTYPE=XLSR
+APPTYPE=SUPERRES
diff --git a/apps/android/ImageSuperResolution/build_apk.py b/apps/android/ImageSuperResolution/build_apk.py
new file mode 100644
index 00000000..28deae76
--- /dev/null
+++ b/apps/android/ImageSuperResolution/build_apk.py
@@ -0,0 +1,182 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import argparse
+import glob
+import os
+import shutil
+import subprocess
+import sys
+from enum import Enum
+
+
+class MODELNAME(Enum):
+    xlsr = 1
+    esrgan = 2
+    real_esrgan_general_x4v3 = 3
+    # real_esrgan_x4plus = 4
+    sesr_m5 = 5
+    # quicksrnetsmall = 6
+    # QuickSRNetMedium = 7
+    # QuickSRNetLarge = 8
+    # sesr_m5_quantized = 9
+    # xlsr_quantized = 10
+
+
+def printmenu():
+    print("*****************************")
+    print("*       TYPE OF MODEL       *")
+    print("*****************************")
+    for m in MODELNAME:
+        print(str(m.value) + ". " + m.name)
+    print("*****************************")
+
+
+## Initialize parser
+parser = argparse.ArgumentParser()
+parser.add_argument("-q", "--qnnsdk", required=True, help="Give path of QNN SDK")
+
+parser.add_argument("-m", "--model_name", type=str, help="Model Name")
+
+
+# group = parser.add_mutually_exclusive_group()
+# group.add_argument('-stopdownload', '--stopdownload', action = "store_true", help = "Do NOT Download Model from AI HUB")
+parser.add_argument("-path", "--model_path", type=str, help="TFLITE model file")
+
+args = parser.parse_args()
+
+
+##based on this pre-post can be decided
+if not args.model_name:
+    printmenu()
+    inp_model_name = int(input("Please select one: "))
+    args.model_name = MODELNAME(inp_model_name).name
+
+
+destAsset = os.path.join(".", "superresolution", "src", "main", "assets")
+if not os.path.exists(destAsset):
+    os.makedirs(destAsset)
+
+
+## MODEL PATH NOT MENTIONED, add information into model_path
+if not args.model_path:
+    exportstatus = input("Do you want us to download the model from AI hub (y/n)")
+
+    ##DOWNLAOD USING EXPORT.PY
+    if exportstatus.lower().startswith("y"):
+        print("EXPORT form path")
+        pathtomodel = os.path.join(
+            "..",
+            "..",
+            "..",
+            "",
+            "qai_hub_models",
+            "models",
+            args.model_name,
+            "export.py",
+        )
+        if not os.path.exists(pathtomodel):
+            print("PATH DO NOT EXIST: " + pathtomodel)
+            exit()
+        subprocess.run(["python", pathtomodel, "--skip-inferencing"])
+        tflite_file = glob.glob(
+            "build" + os.sep + args.model_name + os.sep + "*.tflite", recursive=True
+        )
+        args.model_path = tflite_file[0]
+        # shutil.copy(tflite_file[0], destAsset+os.sep+"superresmodel.tflite")
+
+    ##GET USER TO GIVE PATH
+    else:
+        args.model_path = input("Give model File as input")
+        # if not os.path.exists(tflite_file):
+        # print("PATH DO NOT EXIST: "+tflite_file)
+        # exit()
+        # shutil.copy(tflite_file, destAsset+os.sep+"superresmodel.tflite")
+
+
+if args.model_path:
+    print(args.model_path)
+    if not os.path.exists(args.model_path):
+        print("PATH DO NOT EXIST: " + args.model_path)
+        exit()
+    shutil.copy(args.model_path, destAsset + os.sep + "superresmodel.tflite")
+
+
+## GETTING PRE/POST PROCESSS BASED ON MODEL NAME
+ProPostModel = "XLSR"
+AppType = "SUPERRES"
+
+if args.model_name:
+    if "esrgan" in args.model_name.lower():
+        print("MODEL IS ESRAGAN")
+        ProPostModel = "ESRGAN"
+
+with open("build.properties", "w") as f:
+    f.write(f"MODELTYPE={ProPostModel}\n")
+    f.write(f"APPTYPE={AppType}\n")
+
+## COPYING REQUIRED FILES FROM QNN SDK
+destJNI = os.path.join(".", "superresolution", "src", "main", "jniLibs", "arm64-v8a")
+if not os.path.exists(destJNI):
+    os.makedirs(destJNI)
+
+# copy *.so from $qnn_sdk/libs/aarch64-android to $jni_lib_dir
+qnnbasiclibs = os.path.join(args.qnnsdk, "lib", "aarch64-android")
+shutil.copytree(qnnbasiclibs, destJNI, dirs_exist_ok=True)
+
+# copy $qnn_sdk/lib/hexagon-v**/unsigned/libQnnHtpV**Skel.so to $jni_lib_dir
+skelstubfiles = os.path.join(args.qnnsdk, "lib", "hexagon-v**", "unsigned", "*.so")
+for file in glob.glob(skelstubfiles):
+    shutil.copy(file, destJNI)
+
+# copy qtld-release.aar to $test_app_root/Application/
+destaar = os.path.join(".", "superresolution", "libs")
+if not os.path.exists(destaar):
+    os.makedirs(destaar)
+aarfile = os.path.join(args.qnnsdk, "lib", "android", "qtld-release.aar")
+shutil.copy(aarfile, destaar)
+
+
+## BUILDING APK
+if sys.platform.startswith("win"):
+    print("Detected platform is windows")
+    gradleoutput = subprocess.run(["gradlew.bat", "assembleDebug"], cwd=".")
+elif sys.platform.startswith("darwin"):
+    print("Detected platform is MAC")
+    gradleoutput = subprocess.run(["./gradlew", "assembleDebug"], cwd=".")
+else:
+    print("Detected platform is Linux")
+    gradleoutput = subprocess.run(["./gradlew", "assembleDebug"], cwd=".")
+
+
+## COPYING APK TO CWD
+ApkPath = os.path.join(
+    os.getcwd(),
+    "superresolution",
+    "build",
+    "outputs",
+    "apk",
+    "debug",
+    "superresolution-debug.apk",
+)
+print("APK Is copied at current Working Directory")
+shutil.copy(ApkPath, ".")
+
+
+install_perm = input("Do you want to install this apk in connected device")
+## INSTALLING AND RUNNING APK
+if install_perm.lower().startswith("y"):
+    command_to_install = ["adb", "install", "superresolution-debug.apk"]
+    subprocess.run(command_to_install, cwd=".")
+    command_to_run = [
+        "adb",
+        "shell",
+        "am",
+        "start",
+        "-a",
+        "com.example.ACTION_NAME",
+        "-n",
+        "com.qcom.imagesuperres/com.qcom.imagesuperres.QNNActivity",
+    ]
+    subprocess.run(command_to_run, cwd=".")
diff --git a/apps/android/ImageSuperResolution/gradle.properties b/apps/android/ImageSuperResolution/gradle.properties
new file mode 100644
index 00000000..08e95206
--- /dev/null
+++ b/apps/android/ImageSuperResolution/gradle.properties
@@ -0,0 +1,20 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. More details, visit
+# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app"s APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Automatically convert third-party libraries to use AndroidX
+#android.enableJetifier=true
+android.nonTransitiveRClass=true
diff --git a/apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.jar b/apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 00000000..c4868dfc
--- /dev/null
+++ b/apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33ad4583fd7ee156f533778736fa1b4940bd83b433934d1cc4e9f608e99a6a89
+size 59536
diff --git a/apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.properties b/apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 00000000..4a0ccd62
--- /dev/null
+++ b/apps/android/ImageSuperResolution/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Fri Sep 09 10:14:39 IST 2022
+distributionBase=GRADLE_USER_HOME
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
+distributionPath=wrapper/dists
+zipStorePath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
diff --git a/apps/android/ImageSuperResolution/gradlew b/apps/android/ImageSuperResolution/gradlew
new file mode 100644
index 00000000..744e882e
--- /dev/null
+++ b/apps/android/ImageSuperResolution/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MSYS* | MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/apps/android/ImageSuperResolution/gradlew.bat b/apps/android/ImageSuperResolution/gradlew.bat
new file mode 100644
index 00000000..ac1b06f9
--- /dev/null
+++ b/apps/android/ImageSuperResolution/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/apps/android/ImageSuperResolution/settings.gradle b/apps/android/ImageSuperResolution/settings.gradle
new file mode 100644
index 00000000..667b31b5
--- /dev/null
+++ b/apps/android/ImageSuperResolution/settings.gradle
@@ -0,0 +1,27 @@
+pluginManagement {
+    repositories {
+        gradlePluginPortal()
+        google()
+        mavenCentral()
+    }
+}
+
+dependencyResolutionManagement {
+    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+    repositories {
+        google()
+        mavenCentral()
+        maven {         // add this repo to use snapshots
+            name 'ossrh-snapshot'
+            url 'https://oss.sonatype.org/content/repositories/snapshots'
+
+        }
+        flatDir {
+            dirs 'libs'
+        }
+    }
+}
+
+
+rootProject.name = "superresolution"
+include ':superresolution'
diff --git a/apps/android/ImageSuperResolution/superresolution/build.gradle b/apps/android/ImageSuperResolution/superresolution/build.gradle
new file mode 100644
index 00000000..115fe986
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/build.gradle
@@ -0,0 +1,71 @@
+
+plugins {
+    id 'com.android.application'
+}
+
+
+
+android {
+    compileSdk 32
+    Properties properties = new Properties()
+    properties.load(project.rootProject.file("build.properties").newDataInputStream())
+
+    defaultConfig {
+        applicationId "com.qcom.imagesuperres"
+        minSdk 26
+        targetSdk 32
+        versionCode 1
+        versionName "1.0"
+
+        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+        externalNativeBuild {
+            cmake {
+                cppFlags ''
+            }
+        }
+
+        resValue "string", "modeltype", properties.getProperty("MODELTYPE", "")
+        resValue "string", "apptype", properties.getProperty("APPTYPE", "")
+
+    }
+    aaptOptions {
+        noCompress "tflite"
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+
+    packagingOptions
+    {
+        doNotStrip "**/*.so"
+    }
+}
+project.ext.LIB_DIR = projectDir.toString() + '/libs/'
+project.ext.ASSET_DIR = projectDir.toString() + '/src/main/assets'
+
+dependencies {
+
+    implementation 'androidx.appcompat:appcompat:1.4.2'
+    implementation 'com.google.android.material:material:1.6.1'
+    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
+    testImplementation 'junit:junit:4.13.2'
+    androidTestImplementation 'androidx.test.ext:junit:1.1.3'
+    androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0'
+    implementation 'org.tensorflow:tensorflow-lite:2.13.0'
+    implementation 'org.tensorflow:tensorflow-lite-select-tf-ops:2.9.0'
+    implementation 'org.tensorflow:tensorflow-lite-support:0.4.3'
+
+    implementation fileTree(dir: "libs", include: ["*.aar"])
+    implementation files('libs/qtld-release')
+//    implementation 'org.pytorch:pytorch_android_lite:1.10.0'
+
+    // implementation fileTree(dir: "libs", include: ["*.aar"])
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/proguard-rules.pro b/apps/android/ImageSuperResolution/superresolution/proguard-rules.pro
new file mode 100644
index 00000000..36e00091
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/AndroidManifest.xml b/apps/android/ImageSuperResolution/superresolution/src/main/AndroidManifest.xml
new file mode 100644
index 00000000..e4b778c7
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/AndroidManifest.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.qcom.imagesuperres">
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
+    <uses-permission android:name="android.permission.MANAGE_EXTERNAL_STORAGE"/>
+<application
+    android:allowBackup="true"
+    android:extractNativeLibs="true"
+    android:icon="@mipmap/ic_launcher"
+    android:label="@string/app_name"
+    android:roundIcon="@mipmap/ic_launcher_round"
+    android:supportsRtl="true"
+    android:requestLegacyExternalStorage="true"
+    android:theme="@style/Theme.Enhancement">
+
+    <uses-native-library
+        android:name="libcdsprpc.so"
+        android:required="true" />
+
+    <uses-native-library
+        android:name="libOpenCL.so"
+        android:required="true" />
+
+    <activity android:name=".QNNActivity"
+        android:exported="true"
+        android:screenOrientation="portrait"
+        >
+        <intent-filter>
+            <action android:name="android.intent.action.MAIN" />
+            <category android:name="android.intent.category.LAUNCHER" />
+        </intent-filter>
+    </activity>
+</application>
+
+</manifest>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample1.jpg b/apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample1.jpg
new file mode 100644
index 00000000..7a647f20
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:811f0c08ff16ef506de4855ca2b609ba6c67622f5d65550896cdd7e60c200db2
+size 17244
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample2.jpg b/apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample2.jpg
new file mode 100644
index 00000000..84a8f5a3
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/assets/Sample2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1916e20918eb069c147377b581647c0d86277fdf01ba8e2ed07cf995e2a86cee
+size 13864
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/QNNActivity.java b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/QNNActivity.java
new file mode 100644
index 00000000..6f6fa9fb
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/QNNActivity.java
@@ -0,0 +1,243 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imagesuperres;
+
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.os.Bundle;
+import android.view.MotionEvent;
+import android.view.View;
+import android.view.WindowManager;
+import android.widget.AdapterView;
+import android.widget.ArrayAdapter;
+import android.widget.ImageView;
+import android.widget.ProgressBar;
+import android.widget.RadioGroup;
+import android.widget.Spinner;
+import android.widget.TextView;
+import android.widget.Toast;
+import androidx.appcompat.app.AppCompatActivity;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+public class QNNActivity extends AppCompatActivity {
+    public static final String MODEL_FILE_NAME = "superresmodel.tflite"; //Model file name
+    public static InputStream originalFile = null;
+
+    private boolean spinInitialized = false;
+    private boolean radioGroupInitialized = false;
+    SuperResolution superResolution;
+
+    String prev_runtime = "";
+    //creating objects for UI element used in layout files (activity_superres.xml)
+    TextView txt_stat, tx_pr, tx_out, tx_sug;
+    private static int input_dims1 = 128;
+    private static int  input_dims2 = 128;
+    ImageView imageView, imageView2;
+    RadioGroup radioGroup;
+    Bitmap bmps = null;
+    public static Result<SuperResolutionResult> result = null;
+    Spinner spin;
+    String[] options = {"No Selection","Sample1.jpg","Sample2.jpg"}; //Image filenames on which model inference is made
+    protected void executeRadioButton(int checkedId) {
+
+        ProgressBar progressBar;
+        progressBar = findViewById(R.id.indeterminateBar);
+        ExecutorService service = Executors.newSingleThreadExecutor();
+        progressBar.setVisibility(View.VISIBLE);
+        getWindow().setFlags(WindowManager.LayoutParams.FLAG_NOT_TOUCHABLE,
+                WindowManager.LayoutParams.FLAG_NOT_TOUCHABLE);
+
+        service.execute(new Runnable() {
+            @Override
+            public void run() {
+                try {
+                    switch (checkedId) {
+                        case R.id.rb1:
+                            // set text for your textview here
+                            System.out.println("CPU instance running");
+                            result = process(bmps, "TFLITE");
+                            break;
+
+                        case R.id.rb3:
+                            System.out.println("NPU instance running");
+                            result = process(bmps, "QNNDELEGATE");
+                            break;
+                        default:
+                            System.out.println("Do Nothing");
+                    }
+                    boolean final_status = result.getStatus();
+                    final String final_timestr = "INFERENCE TIME: "+ String.valueOf(result.getInferenceTime())+" ms";
+                    runOnUiThread(new Runnable() {
+                        @Override
+                        public void run() {
+                            txt_stat.setText(final_timestr);
+                            progressBar.setVisibility(View.INVISIBLE);
+                            getWindow().clearFlags(WindowManager.LayoutParams.FLAG_NOT_TOUCHABLE);
+                            if (final_status == true) {
+                                String remark = result.getRemarks();
+                                if(!remark.equals(""))
+                                    Toast.makeText(getApplicationContext(),remark,Toast.LENGTH_LONG).show();
+                                imageView2.setImageBitmap(result.getResults().getHighResolutionImages()[0]);
+                                imageView2.setVisibility(View.VISIBLE);
+                                System.out.println("result displayed");
+                                txt_stat.setVisibility(View.VISIBLE);
+                                tx_pr.setVisibility(View.INVISIBLE);
+                                tx_out.setVisibility(View.VISIBLE);
+                                tx_sug.setVisibility(View.VISIBLE);
+                            }
+                        }
+                    });
+                } catch (Exception e) {
+                    runOnUiThread(new Runnable() {
+                        @Override
+                        public void run() {
+                    getWindow().clearFlags(WindowManager.LayoutParams.FLAG_NOT_TOUCHABLE);
+                    e.printStackTrace();
+                        }
+                    });
+                }
+            }
+        });
+    }
+    @Override
+    protected void onCreate(Bundle savedInstanceState) {
+        super.onCreate(savedInstanceState);
+        setContentView(R.layout.activity_superres);
+        spin = (Spinner) findViewById((R.id.spinner));
+        txt_stat = findViewById(R.id.textView4);
+        imageView = findViewById(R.id.im1);
+        imageView2 = findViewById(R.id.im2);
+        radioGroup = findViewById(R.id.rg1);
+        tx_pr = findViewById(R.id.textView);
+        tx_out = findViewById(R.id.textView2);
+        tx_sug = findViewById(R.id.textView_suggest);
+        imageView2.setVisibility(View.INVISIBLE);
+        tx_out.setVisibility(View.INVISIBLE);
+        tx_sug.setVisibility(View.INVISIBLE);
+
+        superResolution = new SuperResolution();
+
+        imageView2.setOnTouchListener((view, motionEvent) -> {
+            switch (motionEvent.getAction()) {
+                case MotionEvent.ACTION_DOWN: {
+                    imageView2.setVisibility(view.INVISIBLE);
+                    tx_out.setVisibility(view.INVISIBLE);
+                    tx_pr.setVisibility(view.VISIBLE);
+                    break;
+                }
+                case MotionEvent.ACTION_UP: {
+                    imageView2.setVisibility(view.VISIBLE);
+                    tx_out.setVisibility(view.VISIBLE);
+                    tx_pr.setVisibility(view.INVISIBLE);
+                    break;
+                }
+            }
+            return false;
+        });
+
+        ArrayAdapter ad = new ArrayAdapter(this, android.R.layout.simple_spinner_item, options);
+        ad.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item);
+        spin.setAdapter(ad);
+        spin.setOnItemSelectedListener(new AdapterView.OnItemSelectedListener() {
+            @Override
+            public void onItemSelected(AdapterView<?> parent, View view, int position, long id) {
+                // loading picture from assets...
+                if (!parent.getItemAtPosition(position).equals("No Selection")) {
+                    imageView2.setImageResource(R.drawable.ic_launcher_background);
+                    txt_stat.setText("Stats");
+                    try {
+                        originalFile = getAssets().open((String) parent.getItemAtPosition(position));
+                    } catch (IOException e) {
+                        e.printStackTrace();
+                    }
+
+                    // Convert input image to Bitmap
+                    bmps = BitmapFactory.decodeStream(originalFile);
+                    Bitmap scaled1 = Bitmap.createScaledBitmap(bmps, input_dims1, input_dims2, true);
+                    try {
+                        // Set the input image in UI view
+                        imageView.setImageBitmap(scaled1);
+
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                    int checkedID_RB = radioGroup.getCheckedRadioButtonId();
+                    if (originalFile!=null && bmps!=null && checkedID_RB !=-1){
+                        executeRadioButton(checkedID_RB);
+                    }
+
+                }
+                else{
+                    originalFile=null;
+                    bmps=null;
+                    imageView.setImageResource(R.drawable.ic_launcher_background);
+                    imageView2.setImageResource(R.drawable.ic_launcher_background);
+                    imageView2.setVisibility(view.INVISIBLE);
+                    txt_stat.setText("Stats");
+                    radioGroup.clearCheck();
+
+                    if(spinInitialized){
+                        Toast.makeText(getApplicationContext(), "Please select image first", Toast.LENGTH_SHORT).show();
+                    }
+                    else
+                    {
+                        spinInitialized = true;
+                    }
+                }
+            }
+            @Override
+            public void onNothingSelected(AdapterView<?> parent) {
+                System.out.println("Nothing");
+            }
+        });
+
+        radioGroup.setOnCheckedChangeListener(new RadioGroup.OnCheckedChangeListener() {
+            @Override
+            public void onCheckedChanged(RadioGroup group, int checkedId) {
+                if (originalFile!=null && bmps!=null){
+                    executeRadioButton(checkedId);
+                }
+                else{
+                    if(radioGroupInitialized) {
+                        Toast.makeText(getApplicationContext(), "Please select image first", Toast.LENGTH_SHORT).show();
+                    }
+                    else
+                    {
+                        radioGroupInitialized = true;
+                    }
+                }
+            }
+        });
+    }
+
+    public Result<SuperResolutionResult> process(Bitmap bmps, String run_time) {
+
+        Result<SuperResolutionResult> result;
+        try {
+
+            if(superResolution.getBuildStatus()==false)
+                superResolution.initializeModel(this, MODEL_FILE_NAME);
+
+            //INFERENCING ON MODEL
+            result = superResolution.inference(new Bitmap[]{bmps}, run_time);
+            return result;
+
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+
+    @Override
+    protected void onDestroy()
+    {
+        super.onDestroy();
+        superResolution.close();
+    }
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Result.java b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Result.java
new file mode 100644
index 00000000..17c5f909
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Result.java
@@ -0,0 +1,38 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imagesuperres;
+import java.util.List;
+
+public class Result<E> {
+
+    private final E results;
+    private final long inferenceTime;
+    private final String remarks;
+    private boolean status = false;
+    public Result(E results, long inferenceTime,String remarks) {
+
+        this.results = results;
+        this.inferenceTime = inferenceTime;
+        this.remarks = remarks;
+
+
+        if(inferenceTime>0) this.status = true;
+    }
+
+    public E getResults() {
+        return results;
+    }
+
+    public String getRemarks() {
+        return remarks;
+    }
+
+    public long getInferenceTime() {
+        return inferenceTime;
+    }
+
+    public boolean getStatus(){return status; }
+
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolution.java b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolution.java
new file mode 100644
index 00000000..3b96a77f
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolution.java
@@ -0,0 +1,165 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imagesuperres;
+
+
+import android.content.Context;
+import android.graphics.Bitmap;
+import java.io.IOException;
+import java.nio.MappedByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import com.qualcomm.qti.QnnDelegate;
+import org.tensorflow.lite.Interpreter;
+import android.content.res.AssetFileDescriptor;
+import android.content.res.AssetManager;
+import android.util.Log;
+import android.widget.ImageView;
+import java.io.FileInputStream;
+import java.nio.channels.FileChannel;
+
+public class SuperResolution {
+    private MappedByteBuffer tfLiteModel;
+    private Interpreter tfLite;
+    private Interpreter tfLite_QNN;
+
+    private boolean model_loaded= false;
+
+    private QnnDelegate qnnDelegate = null;
+    private static final String TAG = "SUPERRES";
+    private static Utils util = new Utils();
+
+
+    private static MappedByteBuffer loadModelFile(AssetManager assets, String modelFilename)
+            throws IOException {
+        AssetFileDescriptor fileDescriptor = assets.openFd(modelFilename);
+        FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
+        FileChannel fileChannel = inputStream.getChannel();
+        long startOffset = fileDescriptor.getStartOffset();
+        long declaredLength = fileDescriptor.getDeclaredLength();
+        return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+    }
+
+    public void close()
+    {
+        if(qnnDelegate!=null) {
+            qnnDelegate.close();
+        }
+
+        if(tfLite != null){
+            tfLite.close();
+        }
+
+        if(tfLiteModel!=null)
+            tfLiteModel.clear();
+    }
+
+    public boolean getBuildStatus()
+    {
+        return model_loaded;
+    }
+    public boolean initializeModel(Context context, String tflitemodelfileName) {
+
+        //If modeltype is in Red, it will resolved after building the app
+        String kk = context.getString(R.string.modeltype);
+        Log.i(TAG,"MY STRING FROM GetProperty is : "+kk);
+
+        if(kk == "ESRGAN")
+            util = new UtilsESRGAN();
+
+        try {
+            tfLiteModel = loadModelFile(context.getApplicationContext().getAssets(), tflitemodelfileName);
+            Log.i(TAG, "MODEL LOADED");
+            Interpreter.Options tfLiteOptions = new Interpreter.Options();
+            tfLiteOptions.setNumThreads(4);
+            tfLiteOptions.setUseXNNPACK(true);
+            tfLite = new Interpreter(tfLiteModel, tfLiteOptions);
+
+            QnnDelegate.Options options = new QnnDelegate.Options();
+            options.setBackendType(QnnDelegate.Options.BackendType.HTP_BACKEND);
+            options.setHtpPerformanceMode(QnnDelegate.Options.HtpPerformanceMode.HTP_PERFORMANCE_BURST);
+            options.setHtpPrecision(QnnDelegate.Options.HtpPrecision.HTP_PRECISION_FP16);
+
+            Log.i(TAG, "NATIVE LIB PATH: " + context.getApplicationInfo().nativeLibraryDir);
+            options.setSkelLibraryDir(context.getApplicationInfo().nativeLibraryDir);
+            qnnDelegate = new QnnDelegate(options);
+            tfLiteOptions.addDelegate(qnnDelegate);
+            tfLite_QNN = new Interpreter(tfLiteModel,tfLiteOptions);
+            Log.i(TAG, "QnnDelegate Option Added ");
+            model_loaded= true;
+            return true;
+        }
+        catch (Exception e)
+        {
+            e.printStackTrace();
+            return false;
+        }
+
+    }
+
+    public Result<SuperResolutionResult> inference(Bitmap[] images, String backend) {
+        System.out.println("Processing %d images %dx%d."+ images.length+ images[0].getWidth()+ images[0].getHeight());
+        String remarks = "";
+        try{
+            int[] arr = tfLite.getInputTensor(0).shape();
+            int input_dims1 = arr[1];
+            int input_dims2 = arr[2];
+
+            if(input_dims1!=input_dims2)
+            {
+                remarks = "THIS APP IS DESIGNED FOR 1:1 ASPECT RATIO";
+            }
+            //PREPROCESSING INPUT to Model input Shape and Normalizing data
+            Bitmap scaledBitmap = Bitmap.createScaledBitmap(images[0],input_dims1,input_dims2,true);
+            float[][][][] floatinputarray = new float[1][input_dims1][input_dims2][3];
+            util.PreProcess(scaledBitmap,input_dims1,input_dims2,floatinputarray);
+
+            Object[] inputArray = {floatinputarray};
+            int[] out_arr = tfLite.getOutputTensor(0).shape();
+            int output_dims1 = out_arr[1];
+            int output_dims2 = out_arr[2];
+
+            float[][][][] floatoutputarray = new float[1][output_dims1][output_dims2][3];
+            Map<Integer, Object> outputMap = new HashMap<>();
+            outputMap.put(0, floatoutputarray);
+
+            Log.i(TAG, "inputTensor shape"+ Arrays.toString(tfLite.getInputTensor(0).shape()));
+            long inferenceStartTime = System.nanoTime();
+            if (backend.equals("QNNDELEGATE") && tfLite_QNN != null) {
+                System.out.println("QNN BACKEND");
+                tfLite_QNN.runForMultipleInputsOutputs(inputArray, outputMap);
+            }
+            else if (backend.equals("TFLITE") && tfLite != null) {
+                System.out.println("TFLITE BACKEND");
+                tfLite.runForMultipleInputsOutputs(inputArray, outputMap);
+            }
+            else
+            {
+                System.out.println("PROBLEM WITH Model Iinitilization");
+            }
+            long inferenceEndTime = System.nanoTime();
+            Log.i(TAG,"MODEL EXECUTED");
+            System.out.println("Inference time: "+ (inferenceEndTime - inferenceStartTime) / 1000);// calculated inference time
+
+
+            Bitmap outbmp = Bitmap.createBitmap(output_dims1, output_dims2, Bitmap.Config.ARGB_8888);
+            util.PostProcess(outbmp, output_dims1, output_dims2, floatoutputarray);
+
+            Bitmap[] finalProcessedImages = new Bitmap[images.length];
+            finalProcessedImages[0] = outbmp;
+
+            SuperResolutionResult result = new SuperResolutionResult(finalProcessedImages);
+            return new Result<>(result,
+                    (inferenceEndTime - inferenceStartTime) / 1000000, remarks);
+
+        } catch (Exception ex) {
+            ex.printStackTrace();
+        }
+        return null;
+    }
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolutionResult.java b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolutionResult.java
new file mode 100644
index 00000000..6e825468
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/SuperResolutionResult.java
@@ -0,0 +1,19 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imagesuperres;
+
+import android.graphics.Bitmap;
+
+public class SuperResolutionResult {
+    private final Bitmap[] highResolutionImages;
+
+    public SuperResolutionResult(Bitmap[] highResolutionImages) {
+        this.highResolutionImages = highResolutionImages;
+    }
+
+    public Bitmap[] getHighResolutionImages() {
+        return highResolutionImages;
+    }
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Utils.java b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Utils.java
new file mode 100644
index 00000000..8aa6a2f8
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/Utils.java
@@ -0,0 +1,36 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imagesuperres;
+
+import android.graphics.Bitmap;
+import android.graphics.Color;
+
+public class Utils {
+
+    public void PreProcess(Bitmap inputBitmap, int input_dims1, int input_dims2, float[][][][] floatinputarray){
+        for (int x = 0; x < input_dims1; x++) {
+            for (int y = 0; y < input_dims2; y++) {
+                int pixel = inputBitmap.getPixel(x, y);
+                // Normalize channel values to [-1.0, 1.0]. Here, pixel values
+                // are positive so the effective range will be [0.0, 1.0]
+                floatinputarray[0][x][y][0] = (Color.red(pixel))/255.0f;
+                floatinputarray[0][x][y][1] = (Color.green(pixel))/255.0f;
+                floatinputarray[0][x][y][2] = (Color.blue(pixel))/255.0f;
+            }
+        }
+    }
+
+    public void PostProcess(Bitmap outbmp, int output_dims1, int output_dims2, float[][][][] floatoutputarray) {
+        for (int x = 0; x < output_dims1; x++) {
+            for (int y = 0; y < output_dims2; y++) {
+                int red = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][0] * 255)));
+                int green = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][1] * 255)));
+                int blue = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][2] * 255)));
+                int color = Color.argb(255, red, green, blue);
+                outbmp.setPixel(x, y, color);
+            }
+        }
+    }
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/UtilsESRGAN.java b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/UtilsESRGAN.java
new file mode 100644
index 00000000..80f3f877
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/java/com/qcom/imagesuperres/UtilsESRGAN.java
@@ -0,0 +1,35 @@
+// ---------------------------------------------------------------------
+// Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------------
+package com.qcom.imagesuperres;
+
+import android.graphics.Bitmap;
+import android.graphics.Color;
+import android.util.Log;
+
+public class UtilsESRGAN extends Utils{
+
+    public void PreProcess(Bitmap inputBitmap, int input_dims1, int input_dims2, float[][][][] floatinputarray){
+        for (int x = 0; x < input_dims1; x++) {
+            for (int y = 0; y < input_dims2; y++) {
+                int pixel = inputBitmap.getPixel(x, y);
+                floatinputarray[0][x][y][0] = Color.red(pixel);
+                floatinputarray[0][x][y][1] = Color.green(pixel);
+                floatinputarray[0][x][y][2] = Color.blue(pixel);
+            }
+        }
+    }
+
+    public void PostProcess(Bitmap outbmp, int output_dims1, int output_dims2, float[][][][] floatoutputarray) {
+        for (int x = 0; x < output_dims1; x++) {
+            for (int y = 0; y < output_dims2; y++) {
+                int red = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][0])));
+                int green = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][1])));
+                int blue = (int) (Math.max(0, Math.min(255, floatoutputarray[0][x][y][2])));
+                int color = Color.argb(255, red, green, blue);
+                outbmp.setPixel(x, y, color);
+            }
+        }
+    }
+}
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/drawable-v24/ic_launcher_foreground.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 00000000..1ff1154f
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:aapt="http://schemas.android.com/aapt"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
+        <aapt:attr name="android:fillColor">
+            <gradient
+                android:endX="85.84757"
+                android:endY="92.4963"
+                android:startX="42.9492"
+                android:startY="49.59793"
+                android:type="linear">
+                <item
+                    android:color="#44000000"
+                    android:offset="0.0" />
+                <item
+                    android:color="#00000000"
+                    android:offset="1.0" />
+            </gradient>
+        </aapt:attr>
+    </path>
+    <path
+        android:fillColor="#FFFFFF"
+        android:fillType="nonZero"
+        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
+        android:strokeWidth="1"
+        android:strokeColor="#00000000" />
+</vector>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/drawable/ic_launcher_background.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 00000000..a4f78de5
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="utf-8"?>
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path
+        android:fillColor="#3DDC84"
+        android:pathData="M0,0h108v108h-108z" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M9,0L9,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,0L19,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,0L29,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,0L39,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,0L49,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,0L59,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,0L69,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,0L79,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M89,0L89,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M99,0L99,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,9L108,9"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,19L108,19"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,29L108,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,39L108,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,49L108,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,59L108,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,69L108,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,79L108,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,89L108,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,99L108,99"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,29L89,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,39L89,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,49L89,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,59L89,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,69L89,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,79L89,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,19L29,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,19L39,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,19L49,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,19L59,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,19L69,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,19L79,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+</vector>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/layout/activity_superres.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/layout/activity_superres.xml
new file mode 100644
index 00000000..11ccc7fe
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/layout/activity_superres.xml
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="utf-8"?>
+<RelativeLayout android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    android:layout_gravity="center"
+    xmlns:android="http://schemas.android.com/apk/res/android">
+
+    <TextView
+        android:id="@+id/textView5"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:layout_alignParentLeft="true"
+        android:text="Select an image : "
+        android:textSize="18sp"
+        />
+
+    <Spinner
+        android:id="@+id/spinner"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:layout_alignParentRight="true"
+        android:layout_toRightOf="@+id/textView5"
+        />
+
+    <TextView
+        android:id="@+id/textView3"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:textSize="18sp"
+        android:layout_alignParentLeft="true"
+        android:text="Select Device runtime"
+        android:layout_centerHorizontal="true"
+        android:layout_below="@+id/spinner" />
+
+    <RadioGroup
+        android:id="@+id/rg1"
+        android:layout_below="@id/textView3"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:orientation="horizontal"
+        android:layout_centerHorizontal="true">
+
+        <RadioButton
+            android:id="@+id/rb1"
+            android:layout_weight="1"
+            android:layout_width="0dp"
+            android:layout_height="wrap_content"
+            android:text="CPU"
+            />
+
+        <RadioButton
+            android:id="@+id/rb3"
+            android:layout_weight="1"
+            android:layout_width="0dp"
+            android:layout_height="wrap_content"
+            android:text="NPU" />
+    </RadioGroup>
+
+    <TextView
+        android:id="@+id/textView"
+        android:layout_below="@id/rg1"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_marginTop="10dp"
+        android:text="Actual Image"
+        android:layout_centerHorizontal="true"
+        />
+
+    <TextView
+        android:id="@+id/textView2"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_below="@id/rg1"
+        android:layout_centerHorizontal="true"
+        android:layout_marginTop="10dp"
+        android:text="Image Super resolution output" />
+
+    <RelativeLayout
+        android:layout_width="match_parent"
+        android:layout_height="370dp"
+        android:layout_centerHorizontal="true"
+        android:layout_below="@id/textView"
+        android:layout_marginTop="5dp"
+        android:id="@+id/relativeLayout2">
+
+    <ImageView
+        android:id="@+id/im1"
+        android:layout_width="370dp"
+        android:layout_height="370dp"
+        android:layout_marginHorizontal="10dp"
+        android:layout_marginTop="10dp"
+        android:layout_centerHorizontal="true"
+        android:src="@drawable/ic_launcher_background" />
+
+    <ImageView
+        android:id="@+id/im2"
+        android:layout_width="370dp"
+        android:layout_height="370dp"
+        android:layout_marginHorizontal="10dp"
+        android:layout_marginTop="10dp"
+        android:layout_centerHorizontal="true"
+        android:clickable="true"
+        android:src="@drawable/ic_launcher_background" />
+
+    <ProgressBar
+        android:id="@+id/indeterminateBar"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_centerVertical="true"
+        android:layout_centerHorizontal="true"
+        android:visibility="invisible"
+        android:layout_centerInParent="true"
+        />
+    </RelativeLayout>
+
+    <TextView
+        android:id="@+id/textView4"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_below="@id/relativeLayout2"
+        android:layout_marginTop="20dp"
+        android:textStyle="bold"
+        android:textSize="18sp"
+        android:layout_centerHorizontal="true"
+        android:text="Stats" />
+
+    <TextView
+        android:id="@+id/textView_suggest"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:layout_below="@id/textView4"
+        android:layout_marginTop="20dp"
+        android:textSize="18sp"
+        android:textStyle="italic"
+        android:layout_centerHorizontal="true"
+        android:text="Touch output image to compare with input" />
+
+</RelativeLayout>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 00000000..3564f5b0
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 00000000..0351084b
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher.png
new file mode 100644
index 00000000..1ae43951
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ee6ca2903e3094d64110dee90aea432f9eb9bc747c7e5c134496b8f7feff3b8
+size 3593
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher_round.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher_round.png
new file mode 100644
index 00000000..e898edde
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-hdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:876e35db6919f81d28dbe042d8535414b2b3bb13d1139c16265f652ca5df65ac
+size 5339
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher.png
new file mode 100644
index 00000000..10afb4e6
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c4db47abd938c35f4926ea0b7b31b9d6c41bef1ddacb2c7685b5c6ea0890e5
+size 2636
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher_round.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher_round.png
new file mode 100644
index 00000000..dd1c781b
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-mdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9bc0ce206c8715db22eacfbceddff820f9b8a0ef3a519bbb88f7b6e65806d71
+size 3388
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher.png
new file mode 100644
index 00000000..cc082f9a
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a5e85ebec8c77c18f5c34d762949674373c7a95de57a8a82ce165c8db8cedbb
+size 4926
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher_round.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher_round.png
new file mode 100644
index 00000000..3910511a
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xhdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81b31d4cabd3e4358db2d99e6c31474b82ae6379dc06ea0c00267f41da1bd599
+size 7472
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher.png
new file mode 100644
index 00000000..db7bbe05
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01cec2d4d6cc59f250e4bbfa445042c3a3b9ebf9788b9f34796e85c0af874da4
+size 7909
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher_round.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
new file mode 100644
index 00000000..17327017
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf24682fa050affbc3bdaae15cd5532c13758a5a0e6d5305cfad52ddefc4d571
+size 11873
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher.png
new file mode 100644
index 00000000..6413db3b
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2f8a8d39587eb912f8d217f2160c4ba4e21a60fd7a6f84849c102bcac725975
+size 10652
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
new file mode 100644
index 00000000..5d1c9ba8
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7ed7010aa67cb843af6df357ff1a74ec1598d2407e4884d12633aa0f11e3a4b
+size 16570
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/values-night/themes.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/values-night/themes.xml
new file mode 100644
index 00000000..a9e583cb
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/values-night/themes.xml
@@ -0,0 +1,17 @@
+
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Theme.Enhancement" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
+        <!-- Primary brand color. -->
+        <item name="colorPrimary">@color/purple_200</item>
+        <item name="colorPrimaryVariant">@color/purple_700</item>
+        <item name="colorOnPrimary">@color/black</item>
+        <!-- Secondary brand color. -->
+        <item name="colorSecondary">@color/teal_200</item>
+        <item name="colorSecondaryVariant">@color/teal_200</item>
+        <item name="colorOnSecondary">@color/black</item>
+        <!-- Status bar color. -->
+        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
+        <!-- Customize your theme here. -->
+    </style>
+</resources>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/values/colors.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/values/colors.xml
new file mode 100644
index 00000000..977bb9f2
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/values/colors.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<resources>
+    <color name="purple_200">#FFBB86FC</color>
+    <color name="purple_500">#FF6200EE</color>
+    <color name="purple_700">#FF3700B3</color>
+    <color name="teal_200">#FF03DAC5</color>
+    <color name="teal_700">#FF018786</color>
+    <color name="black">#FF000000</color>
+    <color name="white">#FFFFFFFF</color>
+</resources>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/values/strings.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/values/strings.xml
new file mode 100644
index 00000000..cdac71e2
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/values/strings.xml
@@ -0,0 +1,4 @@
+
+<resources>
+    <string name="app_name">Super_Resolution</string>
+</resources>
diff --git a/apps/android/ImageSuperResolution/superresolution/src/main/res/values/themes.xml b/apps/android/ImageSuperResolution/superresolution/src/main/res/values/themes.xml
new file mode 100644
index 00000000..e69989a9
--- /dev/null
+++ b/apps/android/ImageSuperResolution/superresolution/src/main/res/values/themes.xml
@@ -0,0 +1,18 @@
+
+
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Theme.Enhancement" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
+        <!-- Primary brand color. -->
+        <item name="colorPrimary">@color/purple_500</item>
+        <item name="colorPrimaryVariant">@color/purple_700</item>
+        <item name="colorOnPrimary">@color/white</item>
+        <!-- Secondary brand color. -->
+        <item name="colorSecondary">@color/teal_200</item>
+        <item name="colorSecondaryVariant">@color/teal_700</item>
+        <item name="colorOnSecondary">@color/black</item>
+        <!-- Status bar color. -->
+        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
+        <!-- Customize your theme here. -->
+    </style>
+</resources>
diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
index 9b229525..1fab5070 100644
--- a/qai_hub_models/_version.py
+++ b/qai_hub_models/_version.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-__version__ = "0.3.2"
+__version__ = "0.4.0"
diff --git a/qai_hub_models/asset_bases.yaml b/qai_hub_models/asset_bases.yaml
index 1b110e24..124db058 100644
--- a/qai_hub_models/asset_bases.yaml
+++ b/qai_hub_models/asset_bases.yaml
@@ -1,7 +1,7 @@
 store_url: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models
 web_asset_folder: models/{model_id}/web-assets
-static_web_banner_filename: banner.png
-animated_web_banner_filename: banner.mp4
+static_web_banner_filename: model_demo.png
+animated_web_banner_filename: model_demo.mp4
 model_asset_folder: models/{model_id}/v{version}
 dataset_asset_folder: datasets/{dataset_id}/v{version}
 repo_url: https://github.com/quic/ai-hub-models/blob/main
diff --git a/qai_hub_models/conftest.py b/qai_hub_models/conftest.py
index 6f57c5d0..9dd11824 100644
--- a/qai_hub_models/conftest.py
+++ b/qai_hub_models/conftest.py
@@ -6,3 +6,10 @@ def pytest_configure(config):
     config.addinivalue_line("markers", "compile: Run compile tests.")
     config.addinivalue_line("markers", "profile: Run profile tests.")
     config.addinivalue_line("markers", "inference: Run inference tests.")
+    config.addinivalue_line("markers", "trace: Run trace accuracy tests.")
+
+
+def pytest_collection_modifyitems(items, config):
+    for item in items:
+        if not any(item.iter_markers()):
+            item.add_marker("unmarked")
diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt
new file mode 100644
index 00000000..cdfa95ed
--- /dev/null
+++ b/qai_hub_models/global_requirements.txt
@@ -0,0 +1,44 @@
+# If you:
+# - Install requirements.txt
+# - Run the aimet installation script
+# - Then install this requirements file
+# That should create an environment that works for every single model.
+
+PySoundFile; sys_platform == 'win32'
+albumentations==0.5.2
+av==10.0.0
+basicsr==1.4.2
+click==8.0
+datasets==2.14.5
+diffusers[torch]==0.21.4
+easydict==1.10
+ffmpeg==1.4
+ftfy==6.1.1
+hydra-core==1.3.0
+imageio[ffmpeg]==2.31.5
+kornia==0.5.0
+librosa==0.10.1
+matplotlib==3.7.4
+mmcv==2.1.0
+mmdet==3.2.0
+mmpose==1.2.0
+openai-whisper==20230314
+pycocotools==2.0.7
+pytorch-lightning==1.6.0
+regex==2023.12.25
+scikit-image==0.21.0
+scikit-learn==1.1.3
+scipy==1.8.1
+seaborn==0.11.0
+sentencepiece==0.2.0
+soundfile==0.12.1
+tflite==2.10.0
+thop==0.1.1.post2209072238
+timm==0.9.11
+tensorboard==2.13.0
+torchaudio==0.13.1
+transformers==4.27.4
+tucker-conv==1.0.1
+ultralytics==8.0.193
+webdataset==0.2.86
+yacs==0.1.8
diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py b/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py
index 4fb3518d..25921362 100644
--- a/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py
+++ b/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py
@@ -49,7 +49,7 @@ def cityscapes_segmentation_demo(
         help="File path or URL to an input image to use for the demo.",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_type.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
     if args.image is None:
         image = TEST_CITYSCAPES_LIKE_IMAGE_ASSET.fetch()
@@ -60,7 +60,7 @@ def cityscapes_segmentation_demo(
 
     input_spec = model_type.get_input_spec()
 
-    inference_model = demo_model_from_cli_args(model_type, args)
+    inference_model = demo_model_from_cli_args(model_type, model_id, args)
     app = CityscapesSegmentationApp(inference_model)
 
     (_, _, height, width) = input_spec["image"][0]
diff --git a/qai_hub_models/models/_shared/common.py b/qai_hub_models/models/_shared/common.py
index 5e2038d4..af10bca3 100644
--- a/qai_hub_models/models/_shared/common.py
+++ b/qai_hub_models/models/_shared/common.py
@@ -2,11 +2,33 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-from typing import Type
+from __future__ import annotations
+
+from typing import Callable, Type
 
 import torch
 
 
+def apply_module_function_recursively(
+    module: torch.nn.Module,
+    tgt_cls: Type[torch.nn.Module],
+    apply_fn: Callable[torch.nn.Module, torch.nn.Module, str],
+    parent_module: Type[torch.nn.Module] = None,
+):
+    """
+    Recursively calls a function on all modules of a given type.
+
+    The function `apply_fn` passes in the module, the parent module, and the
+    name of the module inside the parent module.
+    """
+    for name, child in module.named_children():
+        if isinstance(child, tgt_cls):
+            if parent_module is None or isinstance(module, parent_module):
+                apply_fn(child, module, name)
+        else:
+            apply_module_function_recursively(child, tgt_cls, apply_fn, parent_module)
+
+
 def replace_module_recursively(
     module: torch.nn.Module,
     tgt_cls: Type[torch.nn.Module],
@@ -18,9 +40,8 @@ def replace_module_recursively(
     specified, `tgt_cls` instance must be an immediate member of
     `parent_module` (useful for limiting replacement scope)
     """
-    for name, child in module.named_children():
-        if isinstance(child, tgt_cls):
-            if parent_module is None or isinstance(module, parent_module):
-                setattr(module, name, new_cls(child))
-        else:
-            replace_module_recursively(child, tgt_cls, new_cls)
+
+    def apply_fn(child, pmodule, name):
+        setattr(pmodule, name, new_cls(child))
+
+    apply_module_function_recursively(module, tgt_cls, apply_fn, parent_module)
diff --git a/qai_hub_models/models/_shared/deeplab/demo.py b/qai_hub_models/models/_shared/deeplab/demo.py
index a14b7064..1b4b999d 100644
--- a/qai_hub_models/models/_shared/deeplab/demo.py
+++ b/qai_hub_models/models/_shared/deeplab/demo.py
@@ -21,6 +21,7 @@
 
 def deeplabv3_demo(
     model_type: Type[BaseModel],
+    model_id: str,
     default_image: str | CachedWebAsset,
     num_classes: int,
     is_test: bool,
@@ -35,7 +36,7 @@ def deeplabv3_demo(
         help="image file path or URL.",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_type.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
     input_spec = model_type.get_input_spec()
 
@@ -47,7 +48,7 @@ def deeplabv3_demo(
     # This DeepLabV3 ResNet 50 demo comes from
     # https://pytorch.org/hub/pytorch_vision_deeplabv3_resnet101/
     input_image = image.convert("RGB")
-    inference_model = demo_model_from_cli_args(model_type, args)
+    inference_model = demo_model_from_cli_args(model_type, model_id, args)
     app = DeepLabV3App(inference_model, num_classes=num_classes)
 
     # Run app
diff --git a/qai_hub_models/models/_shared/detr/demo.py b/qai_hub_models/models/_shared/detr/demo.py
index 0a513b0a..e0eeb66e 100644
--- a/qai_hub_models/models/_shared/detr/demo.py
+++ b/qai_hub_models/models/_shared/detr/demo.py
@@ -24,6 +24,7 @@
 # The demo will display the predicted mask in a window.
 def detr_demo(
     model: Type[BaseModel],
+    model_id: str,
     default_weights: str,
     default_image: str | CachedWebAsset,
     is_test: bool = False,
@@ -38,10 +39,10 @@ def detr_demo(
         help="test image file path or URL",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
     # Load image & model
-    detr = demo_model_from_cli_args(model, args)
+    detr = demo_model_from_cli_args(model, model_id, args)
 
     # Run app to scores, labels and boxes
     img = load_image(args.image)
diff --git a/qai_hub_models/models/_shared/detr/model.py b/qai_hub_models/models/_shared/detr/model.py
index 426186a1..9e277c0d 100644
--- a/qai_hub_models/models/_shared/detr/model.py
+++ b/qai_hub_models/models/_shared/detr/model.py
@@ -49,8 +49,8 @@ def forward(
         predictions = self.model(image, mask, return_dict=False)
         return predictions
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         num_channels: int = 3,
         height: int = 480,
diff --git a/qai_hub_models/models/_shared/fastsam/demo.py b/qai_hub_models/models/_shared/fastsam/demo.py
index cc1241ff..59281888 100644
--- a/qai_hub_models/models/_shared/fastsam/demo.py
+++ b/qai_hub_models/models/_shared/fastsam/demo.py
@@ -17,13 +17,16 @@
     get_on_device_demo_parser,
     validate_on_device_demo_args,
 )
-from qai_hub_models.utils.asset_loaders import CachedWebAsset, load_path
+from qai_hub_models.utils.asset_loaders import CachedWebAsset, load_image
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.display import display_or_save_image
 
 
 def fastsam_demo(
-    model_type: Type[BaseModel], image_path: str | CachedWebAsset, is_test: bool
+    model_type: Type[BaseModel],
+    model_id: str,
+    image_path: str | CachedWebAsset,
+    is_test: bool,
 ):
     # Demo parameters
     parser = get_model_cli_parser(model_type)
@@ -36,21 +39,27 @@ def fastsam_demo(
     )
 
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_type.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
-    model = demo_model_from_cli_args(model_type, args)
+    model = demo_model_from_cli_args(model_type, model_id, args)
     app = FastSAMApp(model)
 
+    image = load_image(args.image)
+
     with tempfile.TemporaryDirectory() as tmpdir:
-        image_path = load_path(args.image, tmpdir)
+        image_path = os.path.join(tmpdir, "inp_image.jpg")
+        image.save(image_path)
         pred, prompt_process = app.segment_image(image_path)
 
-    # Store the output image
-    output_dirname, _ = os.path.split(image_path)
-    output_path = os.path.join(output_dirname, "output.jpg")
-    prompt_process.plot(annotations=pred, output=output_path)
+        # Store the output image
+        output_path = os.path.join(args.output_dir or tmpdir, "output.jpg")
+
+        # Save the output
+        prompt_process.plot(annotations=pred, output=output_path)
 
-    # Display the output
-    output_image = Image.open(output_path)
-    if not is_test:
-        display_or_save_image(output_image, args.output_dir)
+        if is_test:
+            assert pred is not None
+        else:
+            display_or_save_image(
+                Image.open(output_path), args.output_dir, "output.jpg"
+            )
diff --git a/qai_hub_models/models/_shared/fastsam/model.py b/qai_hub_models/models/_shared/fastsam/model.py
index 092d44e6..4342fb72 100644
--- a/qai_hub_models/models/_shared/fastsam/model.py
+++ b/qai_hub_models/models/_shared/fastsam/model.py
@@ -48,8 +48,8 @@ def forward(self, image: torch.Tensor):
             predictions[1][2],
         )
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         num_channels: int = 3,
         height: int = 640,
diff --git a/qai_hub_models/models/_shared/ffnet/model.py b/qai_hub_models/models/_shared/ffnet/model.py
index 16834289..21c94b67 100644
--- a/qai_hub_models/models/_shared/ffnet/model.py
+++ b/qai_hub_models/models/_shared/ffnet/model.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import os
+from importlib import reload
 
 import torch
 
@@ -105,6 +106,16 @@ def _load_ffnet_source_model(variant_name) -> torch.nn.Module:
         import config
 
         config.model_weights_base_path = root_weights_path
+
+        # This repository has a top-level "models", which is common. We
+        # explicitly reload it in case it has been loaded and cached by another
+        # package (or our models when executing from qai_hub_models/).
+        # This reload must happen after the config fix, and before trying to
+        # load model_entrypoint.
+        import models
+
+        reload(models)
+
         from models.model_registry import model_entrypoint
 
         model = model_entrypoint(variant_name)().eval()
diff --git a/qai_hub_models/models/_shared/imagenet_classifier/demo.py b/qai_hub_models/models/_shared/imagenet_classifier/demo.py
index 41a81e70..7dad494c 100644
--- a/qai_hub_models/models/_shared/imagenet_classifier/demo.py
+++ b/qai_hub_models/models/_shared/imagenet_classifier/demo.py
@@ -2,7 +2,7 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-from typing import Type
+from typing import List, Type
 
 import torch
 
@@ -26,6 +26,7 @@
     load_image,
     load_json,
 )
+from qai_hub_models.utils.base_model import TargetRuntime
 
 IMAGENET_LABELS_ASSET = CachedWebModelAsset(
     "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json",
@@ -37,10 +38,20 @@
 
 # Run Imagenet Classifier end-to-end on a sample image.
 # The demo will print the predicted class to terminal.
-def imagenet_demo(model_cls: Type[ImagenetClassifier], is_test: bool = False):
+def imagenet_demo(
+    model_cls: Type[ImagenetClassifier],
+    model_id: str,
+    is_test: bool = False,
+    available_target_runtimes: List[TargetRuntime] = list(
+        TargetRuntime.__members__.values()
+    ),
+):
+
     # Demo parameters
     parser = get_model_cli_parser(model_cls)
-    parser = get_on_device_demo_parser(parser)
+    parser = get_on_device_demo_parser(
+        parser, available_target_runtimes=available_target_runtimes
+    )
     parser.add_argument(
         "--image",
         type=str,
@@ -48,9 +59,9 @@ def imagenet_demo(model_cls: Type[ImagenetClassifier], is_test: bool = False):
         help="test image file path or URL",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_cls.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
-    model = demo_model_from_cli_args(model_cls, args)
+    model = demo_model_from_cli_args(model_cls, model_id, args)
     app = ImagenetClassifierApp(model)
     print("Model Loaded")
 
diff --git a/qai_hub_models/models/_shared/imagenet_classifier/model.py b/qai_hub_models/models/_shared/imagenet_classifier/model.py
index fb680725..070e1437 100644
--- a/qai_hub_models/models/_shared/imagenet_classifier/model.py
+++ b/qai_hub_models/models/_shared/imagenet_classifier/model.py
@@ -28,16 +28,19 @@ class ImagenetClassifier(BaseModel):
     def __init__(
         self,
         net: torch.nn.Module,
+        transform_input: bool = False,
     ):
         """
         Basic initializer which takes in a pretrained classifier network.
         Subclasses can choose to implement their own __init__ and forward methods.
         """
         super().__init__()
+        self.transform_input = transform_input
         self.net = net
         self.eval()
 
-    def forward(self, image_tensor: torch.Tensor):
+    # Type annotation on image_tensor causes aimet onnx export failure
+    def forward(self, image_tensor):
         """
         Predict class probabilities for an input `image`.
 
@@ -54,14 +57,22 @@ def forward(self, image_tensor: torch.Tensor):
             A [1, 1000] where each value is the log-likelihood of
             the image belonging to the corresponding Imagenet class.
         """
+        if self.transform_input:
+            # This is equivalent but converts better than the built-in.
+            # transform_input should be turned off in torchvision model.
+            shape = (1, 3, 1, 1)
+            scale = torch.tensor([0.229 / 0.5, 0.224 / 0.5, 0.225 / 0.5]).reshape(shape)
+            bias = torch.tensor(
+                [(0.485 - 0.5) / 0.5, (0.456 - 0.5) / 0.5, (0.406 - 0.5) / 0.5]
+            ).reshape(shape)
+            image_tensor = image_tensor * scale + bias
         return self.net(image_tensor)
 
     def get_evaluator(self) -> BaseEvaluator:
         return ClassificationEvaluator()
 
-    def get_input_spec(
-        self,
-    ) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         """
         Returns the input specification (name -> (shape, type). This can be
         used to submit profiling job on Qualcomm® AI Hub.
diff --git a/qai_hub_models/models/_shared/imagenet_classifier/test_utils.py b/qai_hub_models/models/_shared/imagenet_classifier/test_utils.py
index d8865ca5..cf3bd022 100644
--- a/qai_hub_models/models/_shared/imagenet_classifier/test_utils.py
+++ b/qai_hub_models/models/_shared/imagenet_classifier/test_utils.py
@@ -100,7 +100,7 @@ def run_imagenet_classifier_trace_test(
             model.convert_to_torchscript(check_trace=check_trace)
         )
     else:
-        trace_app = ImagenetClassifierApp(model.convert_to_quantized_torchscript())
+        trace_app = ImagenetClassifierApp(model.convert_to_torchscript())
     probabilities = app.predict(img)
     trace_probs = trace_app.predict(img)
     assert_most_close(probabilities.numpy(), trace_probs.numpy(), diff_tol, rtol, atol)
diff --git a/qai_hub_models/models/_shared/quicksrnet/common.py b/qai_hub_models/models/_shared/quicksrnet/common.py
index 248c125e..3883190e 100644
--- a/qai_hub_models/models/_shared/quicksrnet/common.py
+++ b/qai_hub_models/models/_shared/quicksrnet/common.py
@@ -4,15 +4,10 @@
 # ---------------------------------------------------------------------
 import torch
 
-from qai_hub_models.utils.asset_loaders import SourceAsRoot
-
-QUICKSRNET_SOURCE_REPOSITORY = "https://github.com/quic/aimet-model-zoo"
-QUICKSRNET_SOURCE_REPO_COMMIT = "d09d2b0404d10f71a7640a87e9d5e5257b028802"
+from qai_hub_models.utils.aimet.repo import aimet_zoo_as_root
 
 
 def _load_quicksrnet_source_model(
-    model_id,
-    model_asset_version,
     scaling_factor,
     num_channels,
     num_intermediate_layers,
@@ -20,22 +15,7 @@ def _load_quicksrnet_source_model(
 ) -> torch.nn.Module:
     # Load QuickSRNet model from the source repository using the given weights.
     # Returns <source repository>.utils.super_resolution.models.QuickSRNetBase
-    with SourceAsRoot(
-        QUICKSRNET_SOURCE_REPOSITORY,
-        QUICKSRNET_SOURCE_REPO_COMMIT,
-        model_id,
-        model_asset_version,
-    ):
-        # Remove import of model_definition.py as it has an import error itself,
-        # but we don't need anything from that file here
-        with open("aimet_zoo_torch/quicksrnet/__init__.py", "r") as file:
-            file_content = file.read()
-        new_content = file_content.replace(
-            "from .model.model_definition import QuickSRNet", " "
-        )
-        with open("aimet_zoo_torch/quicksrnet/__init__.py", "w") as file:
-            file.write(new_content)
-
+    with aimet_zoo_as_root():
         from aimet_zoo_torch.quicksrnet.model.models import QuickSRNetBase
 
         return QuickSRNetBase(
diff --git a/qai_hub_models/models/_shared/repaint/app.py b/qai_hub_models/models/_shared/repaint/app.py
index bb82f8b8..5a6165c1 100644
--- a/qai_hub_models/models/_shared/repaint/app.py
+++ b/qai_hub_models/models/_shared/repaint/app.py
@@ -4,7 +4,7 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-from typing import Callable, List
+from typing import Callable, Dict, List
 
 import numpy as np
 import torch
@@ -36,6 +36,26 @@ def predict(self, *args, **kwargs):
         # See paint_mask_on_image.
         return self.paint_mask_on_image(*args, **kwargs)
 
+    @staticmethod
+    def preprocess_inputs(
+        pixel_values_or_image: torch.Tensor | np.ndarray | Image | List[Image],
+        mask_pixel_values_or_image: torch.Tensor | np.ndarray | Image,
+    ) -> Dict[str, torch.Tensor]:
+        NCHW_fp32_torch_frames = app_to_net_image_inputs(pixel_values_or_image)[1]
+        NCHW_fp32_torch_masks = app_to_net_image_inputs(mask_pixel_values_or_image)[1]
+
+        # The number of input images should equal the number of input masks.
+        if NCHW_fp32_torch_masks.shape[0] != 1:
+            NCHW_fp32_torch_masks = NCHW_fp32_torch_masks.tile(
+                (NCHW_fp32_torch_frames.shape[0], 1, 1, 1)
+            )
+
+        # Mask input image
+        image_masked = (
+            NCHW_fp32_torch_frames * (1 - NCHW_fp32_torch_masks) + NCHW_fp32_torch_masks
+        )
+        return {"image": image_masked, "mask": NCHW_fp32_torch_masks}
+
     def paint_mask_on_image(
         self,
         pixel_values_or_image: torch.Tensor | np.ndarray | Image | List[Image],
@@ -65,19 +85,9 @@ def paint_mask_on_image(
             images: List[PIL.Image]
                 A list of predicted images (one list element per batch).
         """
-        NCHW_fp32_torch_frames = app_to_net_image_inputs(pixel_values_or_image)[1]
-        NCHW_fp32_torch_masks = app_to_net_image_inputs(mask_pixel_values_or_image)[1]
-
-        # The number of input images should equal the number of input masks.
-        if NCHW_fp32_torch_masks.shape[0] != 1:
-            NCHW_fp32_torch_masks = NCHW_fp32_torch_masks.tile(
-                (NCHW_fp32_torch_frames.shape[0], 1, 1, 1)
-            )
-
-        # Mask input image
-        image_masked = (
-            NCHW_fp32_torch_frames * (1 - NCHW_fp32_torch_masks) + NCHW_fp32_torch_masks
+        inputs = self.preprocess_inputs(
+            pixel_values_or_image, mask_pixel_values_or_image
         )
-        out = self.model(image_masked, NCHW_fp32_torch_masks)
+        out = self.model(inputs["image"], inputs["mask"])
 
         return [torch_tensor_to_PIL_image(img) for img in out]
diff --git a/qai_hub_models/models/_shared/repaint/demo.py b/qai_hub_models/models/_shared/repaint/demo.py
index f67f8270..2df57b1c 100644
--- a/qai_hub_models/models/_shared/repaint/demo.py
+++ b/qai_hub_models/models/_shared/repaint/demo.py
@@ -4,7 +4,7 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-from typing import Type
+from typing import List, Type
 
 from qai_hub_models.models._shared.repaint.app import RepaintMaskApp
 from qai_hub_models.utils.args import (
@@ -22,14 +22,18 @@
 # The demo will display the predicted image in a window.
 def repaint_demo(
     model_type: Type[BaseModel],
+    model_id: str,
     default_image: str | CachedWebAsset,
     default_mask: str | CachedWebAsset,
     is_test: bool = False,
+    available_target_runtimes: List[TargetRuntime] = list(
+        TargetRuntime.__members__.values()
+    ),
 ):
     # Demo parameters
     parser = get_model_cli_parser(model_type)
     parser = get_on_device_demo_parser(
-        parser, available_target_runtimes=[TargetRuntime.TFLITE], add_output_dir=True
+        parser, available_target_runtimes=available_target_runtimes, add_output_dir=True
     )
     parser.add_argument(
         "--image",
@@ -44,10 +48,10 @@ def repaint_demo(
         help="test mask file path or URL",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_type.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
     # Load image & model
-    model = demo_model_from_cli_args(model_type, args)
+    model = demo_model_from_cli_args(model_type, model_id, args)
     image = load_image(args.image)
     mask = load_image(args.mask)
     print("Model Loaded")
diff --git a/qai_hub_models/models/_shared/sesr/common.py b/qai_hub_models/models/_shared/sesr/common.py
index eebef83c..157fdea7 100644
--- a/qai_hub_models/models/_shared/sesr/common.py
+++ b/qai_hub_models/models/_shared/sesr/common.py
@@ -6,22 +6,18 @@
 
 import torch
 
-from qai_hub_models.utils.asset_loaders import SourceAsRoot
+from qai_hub_models.utils.aimet.repo import aimet_zoo_as_root
 
 # SESR original repo is here: https://github.com/ARM-software/sesr
-# But this is all written in TF and Keras. Torch version is in AIMET
-SESR_SOURCE_REPOSITORY = "https://github.com/quic/aimet-model-zoo"
-SESR_SOURCE_REPO_COMMIT = "d09d2b0404d10f71a7640a87e9d5e5257b028802"
+# But this is all written in TF and Keras. Torch version is in AIMET Zoo
 
 
 def _load_sesr_source_model(
-    model_id, model_asset_version: int | str, scaling_factor, num_channels, num_lblocks
+    scaling_factor, num_channels, num_lblocks
 ) -> torch.nn.Module:
     # Load SESR model from the source repository using the given weights.
     # Returns <source repository>.utils.super_resolution.models.SESRRelease
-    with SourceAsRoot(
-        SESR_SOURCE_REPOSITORY, SESR_SOURCE_REPO_COMMIT, model_id, model_asset_version
-    ):
+    with aimet_zoo_as_root():
 
         from aimet_zoo_torch.common.super_resolution.models import SESRRelease
 
diff --git a/qai_hub_models/models/_shared/super_resolution/demo.py b/qai_hub_models/models/_shared/super_resolution/demo.py
index e8d545ec..3ed3cb5d 100644
--- a/qai_hub_models/models/_shared/super_resolution/demo.py
+++ b/qai_hub_models/models/_shared/super_resolution/demo.py
@@ -23,6 +23,7 @@
 # The demo will display both the input image and the higher resolution output.
 def super_resolution_demo(
     model_cls: Type[BaseModel],
+    model_id: str,
     default_image: str | CachedWebAsset,
     is_test: bool = False,
     available_target_runtimes: List[TargetRuntime] = list(
@@ -44,7 +45,7 @@ def super_resolution_demo(
     )
 
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_cls.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
     # Load image & model
     image = load_image(args.image)
@@ -64,6 +65,7 @@ def super_resolution_demo(
 
     inference_model = demo_model_from_cli_args(
         model_cls,
+        model_id,
         args,
     )
     app = SuperResolutionApp(inference_model)
diff --git a/qai_hub_models/models/_shared/video_classifier/model.py b/qai_hub_models/models/_shared/video_classifier/model.py
index 6b75ea3a..6fbacf89 100644
--- a/qai_hub_models/models/_shared/video_classifier/model.py
+++ b/qai_hub_models/models/_shared/video_classifier/model.py
@@ -43,8 +43,8 @@ def forward(self, video: torch.Tensor):
         """
         return self.net(video)
 
+    @staticmethod
     def get_input_spec(
-        self,
         num_frames: int = 16,
     ) -> InputSpec:
         """
diff --git a/qai_hub_models/models/_shared/whisper/__init__.py b/qai_hub_models/models/_shared/whisper/__init__.py
new file mode 100644
index 00000000..21a22b31
--- /dev/null
+++ b/qai_hub_models/models/_shared/whisper/__init__.py
@@ -0,0 +1,4 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
diff --git a/qai_hub_models/models/whisper_asr/app.py b/qai_hub_models/models/_shared/whisper/app.py
similarity index 98%
rename from qai_hub_models/models/whisper_asr/app.py
rename to qai_hub_models/models/_shared/whisper/app.py
index dcdccd68..4a548b94 100644
--- a/qai_hub_models/models/whisper_asr/app.py
+++ b/qai_hub_models/models/_shared/whisper/app.py
@@ -11,7 +11,7 @@
 import whisper  # type: ignore
 from scipy import special as scipy_special  # type: ignore
 
-from qai_hub_models.models.whisper_asr.model import Whisper
+from qai_hub_models.models._shared.whisper.model import Whisper
 from qai_hub_models.utils.model_adapters import TorchNumpyAdapter
 
 # hard-coded audio hyperparameters
@@ -31,8 +31,8 @@ class WhisperApp:
     """
 
     def __init__(self, whisper: Whisper):
-        decoder = whisper.decoder
-        encoder = whisper.encoder
+        decoder = whisper.decoder.to("cpu")
+        encoder = whisper.encoder.to("cpu")
         self.num_decoder_blocks = whisper.num_decoder_blocks
         self.attention_dim = whisper.attention_dim
 
diff --git a/qai_hub_models/models/whisper_asr/demo.py b/qai_hub_models/models/_shared/whisper/demo.py
similarity index 82%
rename from qai_hub_models/models/whisper_asr/demo.py
rename to qai_hub_models/models/_shared/whisper/demo.py
index 9bb729c6..bd9a4fa8 100644
--- a/qai_hub_models/models/whisper_asr/demo.py
+++ b/qai_hub_models/models/_shared/whisper/demo.py
@@ -2,12 +2,14 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-from qai_hub_models.models.whisper_asr.app import (
+from typing import Type
+
+from qai_hub_models.models._shared.whisper.app import (
     WhisperApp,
     load_audio,
     load_mel_filter,
 )
-from qai_hub_models.models.whisper_asr.model import (
+from qai_hub_models.models._shared.whisper.model import (
     MEL_FILTER_PATH,
     MODEL_ASSET_VERSION,
     MODEL_ID,
@@ -20,9 +22,9 @@
 )
 
 
-def main():
+def whisper_demo(model_cls: Type[Whisper]):
     # For other model sizes, see https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17
-    app = WhisperApp(Whisper.from_pretrained())
+    app = WhisperApp(model_cls.from_pretrained())
     TEST_AUDIO_PATH.fetch()
     MEL_FILTER_PATH.fetch()
 
@@ -36,7 +38,3 @@ def main():
     # Perform transcription
     transcription = app.transcribe(mel_input)
     print("Transcription:", transcription)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/qai_hub_models/models/whisper_asr/model.py b/qai_hub_models/models/_shared/whisper/model.py
similarity index 88%
rename from qai_hub_models/models/whisper_asr/model.py
rename to qai_hub_models/models/_shared/whisper/model.py
index 6ed6a7be..50bb546b 100644
--- a/qai_hub_models/models/whisper_asr/model.py
+++ b/qai_hub_models/models/_shared/whisper/model.py
@@ -10,12 +10,12 @@
 import whisper  # type: ignore
 
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel, CollectionModel
+from qai_hub_models.utils.base_model import BaseModel, CollectionModel, TargetRuntime
 from qai_hub_models.utils.input_spec import InputSpec
 
-MAX_DECODE_LEN = 448
+MAX_DECODE_LEN = 224
 
-MODEL_ID = __name__.split(".")[-2]
+MODEL_ID = "whisper_asr_shared"
 MODEL_ASSET_VERSION = 1
 MEL_FILTER_PATH = CachedWebModelAsset.from_asset_store(
     MODEL_ID, MODEL_ASSET_VERSION, "openai_assets/mel_filters.npz"
@@ -70,7 +70,8 @@ def forward(self, audio: torch.Tensor) -> List[torch.Tensor]:
             res.append(residual_block.cross_attn.value(encoder_out))
         return res
 
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         """
         Returns the input specification (name -> (shape, type). This can be
         used to submit profiling job on Qualcomm AI Hub.
@@ -81,6 +82,22 @@ def get_input_spec(self) -> InputSpec:
     def from_pretrained(cls):
         return Whisper.from_pretrained().encoder
 
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --compute_unit gpu"
+
+    def get_hub_profile_options(
+        self, target_runtime: TargetRuntime, other_profile_options: str = ""
+    ) -> str:
+        profile_options = super().get_hub_profile_options(
+            target_runtime, other_profile_options
+        )
+        return profile_options + " --max_profiler_iterations 10" + " --compute_unit gpu"
+
 
 class WhisperDecoderInf(BaseModel):
     """
@@ -101,6 +118,7 @@ def __init__(self, model: whisper.model.TextDecoder):
         self.blocks = torch.nn.ModuleList(
             [ResidualAttentionBlockWrapper(b) for b in model.blocks]
         )
+        self.num_blocks = len(self.blocks)
         for m in ["token_embedding", "ln"]:
             self.add_module(m, getattr(model, m))
         for p in ["positional_embedding"]:
@@ -144,12 +162,11 @@ def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs):
         assert isinstance(self.positional_embedding, torch.nn.Parameter)  # for mypy
         # Set up kv_cache
         kv_cache = {}  # torch.nn.Module -> torch.Tensor
-        num_blocks = len(self.blocks)
         for i, block in enumerate(self.blocks):
             kv_cache.update(
                 {
-                    block.attn.key: kv_cache_args[2 * num_blocks + i * 2],
-                    block.attn.value: kv_cache_args[2 * num_blocks + i * 2 + 1],
+                    block.attn.key: kv_cache_args[2 * self.num_blocks + i * 2],
+                    block.attn.value: kv_cache_args[2 * self.num_blocks + i * 2 + 1],
                     block.cross_attn.key: kv_cache_args[i * 2],
                     block.cross_attn.value: kv_cache_args[i * 2 + 1],
                 }
@@ -178,31 +195,35 @@ def forward(self, x: torch.Tensor, *kv_cache_args, **kv_cache_kwargs):
         # shape: [1, 1, 51864]
         return (logits,) + tuple(kv_cache_new)
 
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec(num_blocks: int, attention_dim: int) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type). This can be
         used to submit profiling job on Qualcomm AI Hub.
         """
         specs = dict(x=((1, 1), "int32"))
-        for i in range(len(self.blocks)):
-            specs[f"b{i}_cross_attn_k"] = ((1, 1500, self.attention_dim), "float32")
-            specs[f"b{i}_cross_attn_v"] = ((1, 1500, self.attention_dim), "float32")
+        for i in range(num_blocks):
+            specs[f"b{i}_cross_attn_k"] = ((1, 1500, attention_dim), "float32")
+            specs[f"b{i}_cross_attn_v"] = ((1, 1500, attention_dim), "float32")
 
         # Use mean length for profiling
         mean_decode_len = MAX_DECODE_LEN // 2
 
-        for i in range(len(self.blocks)):
+        for i in range(num_blocks):
             specs[f"b{i}_self_attn_k"] = (
-                (1, mean_decode_len, self.attention_dim),
+                (1, mean_decode_len, attention_dim),
                 "float32",
             )
             specs[f"b{i}_self_attn_v"] = (
-                (1, mean_decode_len, self.attention_dim),
+                (1, mean_decode_len, attention_dim),
                 "float32",
             )
 
         return specs
 
+    def _get_input_spec_for_model_instance(self) -> InputSpec:
+        return self.__class__.get_input_spec(len(self.blocks), self.attention_dim)
+
     @classmethod
     def from_pretrained(cls):
         return Whisper.from_pretrained().decoder
diff --git a/qai_hub_models/models/whisper_asr/test.py b/qai_hub_models/models/_shared/whisper/test_utils.py
similarity index 71%
rename from qai_hub_models/models/whisper_asr/test.py
rename to qai_hub_models/models/_shared/whisper/test_utils.py
index 7b021e37..a75dd327 100644
--- a/qai_hub_models/models/whisper_asr/test.py
+++ b/qai_hub_models/models/_shared/whisper/test_utils.py
@@ -3,18 +3,16 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
-import pytest
 import torch
 import whisper
 
-from qai_hub_models.models.whisper_asr.app import (
+from qai_hub_models.models._shared.whisper.app import (
     WhisperApp,
     load_audio,
     load_mel_filter,
 )
-from qai_hub_models.models.whisper_asr.demo import TEST_AUDIO_PATH
-from qai_hub_models.models.whisper_asr.demo import main as demo_main
-from qai_hub_models.models.whisper_asr.model import (
+from qai_hub_models.models._shared.whisper.demo import TEST_AUDIO_PATH
+from qai_hub_models.models._shared.whisper.model import (
     MEL_FILTER_PATH,
     Whisper,
     WhisperDecoderInf,
@@ -22,20 +20,22 @@
 )
 
 
-@pytest.fixture(scope="session")
-def mel_input() -> np.ndarray:
+def load_mel_input() -> np.ndarray:
     mel_filter_path = MEL_FILTER_PATH.fetch()
     mel_filter = load_mel_filter(mel_filter_path)
     audio_path = TEST_AUDIO_PATH.fetch()
     return load_audio(mel_filter, audio_path)
 
 
-def test_numerics(mel_input):
+def run_test_wrapper_numerics(whisper_version):
     """
-    Test that wrapper classes predict logits (without post processing) that
-    matches with the original model's.
+    Test that wrapper classes, excluding the
+    app, predict logits (without post
+    processing) that matches with the
+    original model's.
     """
     # OpenAI
+    mel_input = load_mel_input()
     with torch.no_grad():
         mel_input = torch.from_numpy(mel_input)
         model = whisper.load_model("tiny.en")
@@ -49,8 +49,8 @@ def test_numerics(mel_input):
     decoder = WhisperDecoderInf(model.decoder)
 
     cross_attn_cache = encoder(mel_input)
-    cache_tensor = np.array([], dtype=np.float32).reshape((1, 0, 384))
-    self_attn_cache = [torch.from_numpy(cache_tensor)] * 2 * 4
+    cache_tensor = np.array([], dtype=np.float32).reshape((1, 0, decoder.attention_dim))
+    self_attn_cache = [torch.from_numpy(cache_tensor)] * 2 * decoder.num_blocks
 
     decoder_out = decoder(tokens, *cross_attn_cache, *self_attn_cache)
     logits = decoder_out[0].detach().numpy()
@@ -58,14 +58,16 @@ def test_numerics(mel_input):
     np.testing.assert_allclose(logits_orig, logits)
 
 
-def test_transcribe(mel_input):
+def run_test_transcribe(whisper_version):
     """
-    Test that pytorch wrappers produces end to end transcription results that
+    Test that WhisperApp produces end to end transcription results that
     matches with the original model
     """
+    mel_input = load_mel_input()
+
     # Run inference with OpenAI whisper
     with torch.no_grad():
-        model = whisper.load_model("tiny.en")
+        model = whisper.load_model(whisper_version)
         options = whisper.DecodingOptions(
             language="en", without_timestamps=False, fp16=False
         )
@@ -77,7 +79,3 @@ def test_transcribe(mel_input):
     # Perform transcription
     transcription = app.transcribe(mel_input)
     assert transcription == text_orig
-
-
-def test_demo():
-    demo_main()
diff --git a/qai_hub_models/models/_shared/yolo/demo.py b/qai_hub_models/models/_shared/yolo/demo.py
index f9662848..0f6d4a1d 100644
--- a/qai_hub_models/models/_shared/yolo/demo.py
+++ b/qai_hub_models/models/_shared/yolo/demo.py
@@ -24,6 +24,7 @@
 # The demo will display a image with the predicted bounding boxes.
 def yolo_detection_demo(
     model_type: Type[BaseModel],
+    model_id: str,
     app_type: Callable[..., YoloObjectDetectionApp],
     default_image: str | CachedWebAsset,
     stride_multiple: int | None = None,
@@ -49,9 +50,9 @@ def yolo_detection_demo(
         help="Intersection over Union (IoU) threshold for NonMaximumSuppression",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_type.get_model_id())
+    validate_on_device_demo_args(args, model_id)
 
-    model = demo_model_from_cli_args(model_type, args)
+    model = demo_model_from_cli_args(model_type, model_id, args)
 
     app = app_type(model, args.score_threshold, args.iou_threshold)
     print("Model Loaded")
diff --git a/qai_hub_models/models/_shared/yolo/utils.py b/qai_hub_models/models/_shared/yolo/utils.py
index 217d1d7a..5911106f 100644
--- a/qai_hub_models/models/_shared/yolo/utils.py
+++ b/qai_hub_models/models/_shared/yolo/utils.py
@@ -6,8 +6,8 @@
 
 import torch
 
+from qai_hub_models.models.common import SampleInputsType
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
-from qai_hub_models.utils.base_model import InputsType
 from qai_hub_models.utils.image_processing import app_to_net_image_inputs
 
 
@@ -103,7 +103,7 @@ class scores reduced to keep max score per prediction
     return torch.cat(max_scores, dim=-1), torch.cat(max_indices, dim=-1)
 
 
-def yolo_sample_inputs() -> InputsType:
+def yolo_sample_inputs() -> SampleInputsType:
     image_address = CachedWebModelAsset.from_asset_store(
         "yolov7", 1, "yolov7_demo_640.jpg"
     )
diff --git a/qai_hub_models/models/aotgan/README.md b/qai_hub_models/models/aotgan/README.md
new file mode 100644
index 00000000..dff56fea
--- /dev/null
+++ b/qai_hub_models/models/aotgan/README.md
@@ -0,0 +1,54 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [AOT-GAN: High resolution image in-painting on-device](https://aihub.qualcomm.com/models/aotgan)
+
+AOT-GAN is a machine learning model that allows to erase and in-paint part of given input image.
+
+This is based on the implementation of AOT-GAN found
+[here](https://github.com/researchmm/AOT-GAN-for-Inpainting). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/aotgan).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.aotgan.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.aotgan.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of AOT-GAN can be found
+  [here](https://github.com/taki0112/AttnGAN-Tensorflow/blob/master/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [Aggregated Contextual Transformations for High-Resolution Image Inpainting](https://arxiv.org/abs/2104.01431)
+* [Source Model Implementation](https://github.com/researchmm/AOT-GAN-for-Inpainting)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/aotgan/__init__.py b/qai_hub_models/models/aotgan/__init__.py
new file mode 100644
index 00000000..27effd6f
--- /dev/null
+++ b/qai_hub_models/models/aotgan/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.repaint.app import (  # noqa: F401
+    RepaintMaskApp as App,
+)
+
+from .model import AOTGAN as Model  # noqa: F401
+from .model import MODEL_ID  # noqa: F401
diff --git a/qai_hub_models/models/aotgan/conftest.py b/qai_hub_models/models/aotgan/conftest.py
new file mode 100644
index 00000000..ce808157
--- /dev/null
+++ b/qai_hub_models/models/aotgan/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.aotgan import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.aotgan.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/aotgan/demo.py b/qai_hub_models/models/aotgan/demo.py
new file mode 100644
index 00000000..cc1d153d
--- /dev/null
+++ b/qai_hub_models/models/aotgan/demo.py
@@ -0,0 +1,19 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.repaint.demo import repaint_demo
+from qai_hub_models.models.aotgan.model import (
+    AOTGAN,
+    IMAGE_ADDRESS,
+    MASK_ADDRESS,
+    MODEL_ID,
+)
+
+
+def main(is_test: bool = False):
+    repaint_demo(AOTGAN, MODEL_ID, IMAGE_ADDRESS, MASK_ADDRESS, is_test=is_test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py
new file mode 100644
index 00000000..1316347d
--- /dev/null
+++ b/qai_hub_models/models/aotgan/export.py
@@ -0,0 +1,206 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+import torch
+
+from qai_hub_models.models.aotgan import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.input_spec import make_torch_inputs
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
+)
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "aotgan"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "aotgan",
+            "AOT-GAN",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime,
+        compile_options
+        + " --force_channel_last_input image,mask"
+        + " --force_channel_last_output output_0",
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub.Device(device),
+        name=model_name,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        # Convert inputs from channel first to channel last
+        hub_inputs = transpose_channel_first_to_last(
+            "image,mask", sample_inputs, target_runtime
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(str(output_path / f"{model_name}.tflite"))
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = transpose_channel_last_to_first(
+            "output_0", inference_result, target_runtime
+        )
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/aotgan/info.yaml b/qai_hub_models/models/aotgan/info.yaml
new file mode 100644
index 00000000..18157c64
--- /dev/null
+++ b/qai_hub_models/models/aotgan/info.yaml
@@ -0,0 +1,31 @@
+name: AOT-GAN
+id: aotgan
+status: public
+headline: High resolution image in-painting on-device.
+domain: Computer Vision
+description: AOT-GAN is a machine learning model that allows to erase and in-paint
+  part of given input image.
+use_case: Image Editing
+tags: []
+research_paper: https://arxiv.org/abs/2104.01431
+research_paper_title: Aggregated Contextual Transformations for High-Resolution Image
+  Inpainting
+license: https://github.com/taki0112/AttnGAN-Tensorflow/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/researchmm/AOT-GAN-for-Inpainting
+technical_details:
+  Model checkpoint: CelebAHQ
+  Input resolution: 512x512
+  Number of parameters: 15.2M
+  Model size: 58.0 MB
+applicable_scenarios:
+  - Image editing
+form_factors:
+  - Phone
+  - Tablet
+related_models: []
+has_static_banner: yes
+has_animated_banner: yes
+license_type: mit
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/aotgan/model.py b/qai_hub_models/models/aotgan/model.py
new file mode 100644
index 00000000..d5d33563
--- /dev/null
+++ b/qai_hub_models/models/aotgan/model.py
@@ -0,0 +1,131 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+import os
+
+import torch
+import torch.nn as nn
+
+from qai_hub_models.models.common import SampleInputsType
+from qai_hub_models.utils.asset_loaders import (
+    CachedWebModelAsset,
+    SourceAsRoot,
+    load_image,
+)
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.input_spec import InputSpec
+
+AOTGAN_SOURCE_REPOSITORY = "https://github.com/researchmm/AOT-GAN-for-Inpainting/"
+AOTGAN_SOURCE_REPO_COMMIT = "418034627392289bdfc118d62bc49e6abd3bb185"
+AOTGAN_SOURCE_PATCHES = [
+    # Prevent overflow in layer norm (and re-use mean)
+    # On both on TFLite/QNN, the divider by (n - 1) ends up before the sum, so
+    # overflow is avoided.
+    os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "patches", "layer_norm.diff")
+    )
+]
+MODEL_ID = __name__.split(".")[-2]
+SUPPORTED_PRETRAINED_MODELS = set(["celebahq", "places2"])
+DEFAULT_WEIGHTS = "celebahq"
+MODEL_ASSET_VERSION = 2
+
+IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "test_images/test_input_image.png"
+)
+MASK_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "test_images/test_input_mask.png"
+)
+
+
+class AOTGAN(BaseModel):
+    """Exportable AOTGAN for Image inpainting"""
+
+    def __init__(self, model: nn.Module) -> None:
+        super().__init__()
+        self.model = model
+
+    @classmethod
+    def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS):
+        if ckpt_name not in SUPPORTED_PRETRAINED_MODELS:
+            raise ValueError(
+                "Unsupported pre_trained model requested. Please provide either 'celeabhq' or 'places2'."
+            )
+        downloaded_model_path = CachedWebModelAsset.from_asset_store(
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            f"pretrained_models/{ckpt_name}/G0000000.pt",
+        ).fetch()
+        with SourceAsRoot(
+            AOTGAN_SOURCE_REPOSITORY,
+            AOTGAN_SOURCE_REPO_COMMIT,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            source_repo_patches=AOTGAN_SOURCE_PATCHES,
+        ):
+            from src.model.aotgan import InpaintGenerator
+
+            # AOT-GAN InpaintGenerator uses ArgParser to
+            # initialize model and it uses following two parameters
+            #  - rates: default value [1, 2, 4, 8]
+            #  - block_num: default value 8
+            # creating dummy class with default values to set the same
+            class InpaintArgs:
+                def __init__(self):
+                    self.rates = [1, 2, 4, 8]
+                    self.block_num = 8
+
+            args = InpaintArgs()
+            model = InpaintGenerator(args)
+            model.load_state_dict(torch.load(downloaded_model_path, map_location="cpu"))
+            return cls(model)
+
+    def forward(self, image: torch.Tensor, mask: torch.Tensor):
+        """
+        Run AOTGAN Inpaint Generator on `image` with given `mask`
+        and generates new high-resolution in-painted image.
+
+        Parameters:
+            image: Pixel values pre-processed of shape [N, C, H, W]
+                    Range: float[0, 1]
+                    3-channel color Space: BGR
+            mask: Pixel values pre-processed to have have mask values either 0. or 1.
+                    Range: float[0, 1] and only values of 0. or 1.
+                    1-channel binary image.
+
+        Returns:
+            In-painted image for given image and mask of shape [N, C, H, W]
+            Range: float[0, 1]
+            3-channel color space: RGB
+        """
+        return self.model(image, mask)
+
+    @staticmethod
+    def get_input_spec(
+        batch_size: int = 1,
+        num_channels: int = 3,
+        height: int = 512,
+        width: int = 512,
+    ) -> InputSpec:
+        """
+        Returns the input specification (name -> (shape, type). This can be
+        used to submit profiling job on Qualcomm AI Hub.
+        """
+        return {
+            "image": ((batch_size, num_channels, height, width), "float32"),
+            "mask": ((batch_size, 1, height, width), "float32"),
+        }
+
+    def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType:
+        """
+        Provides an example image of a man with a mask over the glasses.
+        """
+        from qai_hub_models.models._shared.repaint.app import RepaintMaskApp
+
+        image = load_image(IMAGE_ADDRESS)
+        mask = load_image(MASK_ADDRESS)
+        torch_inputs = RepaintMaskApp.preprocess_inputs(image, mask)
+        return {k: [v.detach().numpy()] for k, v in torch_inputs.items()}
diff --git a/qai_hub_models/models/aotgan/patches/layer_norm.diff b/qai_hub_models/models/aotgan/patches/layer_norm.diff
new file mode 100644
index 00000000..e67a5ad0
--- /dev/null
+++ b/qai_hub_models/models/aotgan/patches/layer_norm.diff
@@ -0,0 +1,14 @@
+diff --git a/src/model/aotgan.py b/src/model/aotgan.py
+index 518b76c..75d96c3 100644
+--- a/src/model/aotgan.py
++++ b/src/model/aotgan.py
+@@ -80,7 +80,8 @@ class AOTBlock(nn.Module):
+ 
+ def my_layer_norm(feat):
+     mean = feat.mean((2, 3), keepdim=True)
+-    std = feat.std((2, 3), keepdim=True) + 1e-9
++    num_samples = feat.shape[2] * feat.shape[3]
++    std = torch.sqrt(torch.sum((feat - mean) ** 2 / (num_samples - 1), (2, 3), keepdim=True)) + 1e-9
+     feat = 2 * (feat - mean) / std - 1
+     feat = 5 * feat
+     return feat
diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml
new file mode 100644
index 00000000..895bc321
--- /dev/null
+++ b/qai_hub_models/models/aotgan/perf.yaml
@@ -0,0 +1,108 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+models:
+- name: AOT-GAN
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 172836.0
+      throughput: 5.785831655442153
+      estimated_peak_memory_range:
+        min: 3305472
+        max: 6628872
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 235
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 235
+      job_id: jqpyel4gy
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:15:11.791489Z'
+    torchscript_onnx_qnn:
+      inference_time: 162909.0
+      throughput: 6.138396282587212
+      estimated_peak_memory_range:
+        min: 4268032
+        max: 33754568
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 275
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 275
+      job_id: j1p8on8g9
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 127366.0
+      throughput: 7.851388910698303
+      estimated_peak_memory_range:
+        min: 2334720
+        max: 227053936
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 235
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 235
+      job_id: j2p0ywegw
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:17:12.829523Z'
+    torchscript_onnx_qnn:
+      inference_time: 120027.0
+      throughput: 8.331458755113433
+      estimated_peak_memory_range:
+        min: 0
+        max: 140852624
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 275
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 275
+      job_id: jogkz1ogd
+      job_status: Passed
diff --git a/qai_hub_models/models/aotgan/test.py b/qai_hub_models/models/aotgan/test.py
new file mode 100644
index 00000000..302df4ef
--- /dev/null
+++ b/qai_hub_models/models/aotgan/test.py
@@ -0,0 +1,68 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import numpy as np
+import pytest
+
+from qai_hub_models.models._shared.repaint.app import RepaintMaskApp
+from qai_hub_models.models.aotgan.demo import main as demo_main
+from qai_hub_models.models.aotgan.model import (
+    AOTGAN,
+    IMAGE_ADDRESS,
+    MASK_ADDRESS,
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
+from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
+
+OUTPUT_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "test_images/test_output.png"
+)
+
+
+@skip_clone_repo_check
+def test_task():
+    app = RepaintMaskApp(AOTGAN.from_pretrained())
+
+    img = load_image(IMAGE_ADDRESS)
+    mask_image = load_image(MASK_ADDRESS)
+
+    out_imgs = app.paint_mask_on_image(img, mask_image)
+    expected_out = load_image(OUTPUT_ADDRESS)
+    assert_most_close(
+        np.asarray(out_imgs[0], dtype=np.float32),
+        np.asarray(expected_out, dtype=np.float32),
+        0.005,
+        rtol=0.02,
+        atol=1.5,
+    )
+
+
+@pytest.mark.trace
+@skip_clone_repo_check
+def test_trace():
+    net = AOTGAN.from_pretrained()
+    input_spec = net.get_input_spec()
+    trace = net.convert_to_torchscript(input_spec)
+
+    img = load_image(IMAGE_ADDRESS)
+    mask_image = load_image(MASK_ADDRESS)
+    app = RepaintMaskApp(trace)
+
+    out_imgs = app.paint_mask_on_image(img, mask_image)
+    expected_out = load_image(OUTPUT_ADDRESS)
+    assert_most_close(
+        np.asarray(out_imgs[0], dtype=np.float32),
+        np.asarray(expected_out, dtype=np.float32),
+        0.005,
+        rtol=0.02,
+        atol=1.5,
+    )
+
+
+@skip_clone_repo_check
+def test_demo():
+    # Run demo and verify it does not crash
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/baichuan_7b_quantized/README.md b/qai_hub_models/models/baichuan_7b_quantized/README.md
index 58a190d9..e7fb6962 100644
--- a/qai_hub_models/models/baichuan_7b_quantized/README.md
+++ b/qai_hub_models/models/baichuan_7b_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Baichuan-7B found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/baichuan_7b_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -18,7 +18,7 @@ a hosted Qualcomm® device.
 ## License
 - The license for the original implementation of Baichuan-7B can be found
   [here](https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Baichuan 2: Open Large-scale Language Models](https://arxiv.org/abs/2309.10305)
diff --git a/qai_hub_models/models/baichuan_7b_quantized/info.yaml b/qai_hub_models/models/baichuan_7b_quantized/info.yaml
index 4fb26cc4..9d4c7a7d 100644
--- a/qai_hub_models/models/baichuan_7b_quantized/info.yaml
+++ b/qai_hub_models/models/baichuan_7b_quantized/info.yaml
@@ -17,6 +17,7 @@ tags:
 research_paper: https://arxiv.org/abs/2309.10305
 research_paper_title: "Baichuan 2: Open Large-scale Language Models"
 license: https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE
+deploy_license: https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE
 source_repo: https://github.com/baichuan-inc/Baichuan-7B/
 technical_details:
   Number of parameters: 7B
@@ -40,6 +41,7 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: apache-2.0
+deploy_license_type: apache-2.0
 dataset: []
diff --git a/qai_hub_models/models/common.py b/qai_hub_models/models/common.py
new file mode 100644
index 00000000..bf635e0e
--- /dev/null
+++ b/qai_hub_models/models/common.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from enum import Enum
+from typing import Dict, List
+
+import numpy as np
+
+
+class TargetRuntime(Enum):
+    TFLITE = 0
+    QNN = 1
+
+    def __str__(self):
+        return self.name.lower()
+
+
+class SourceModelFormat(Enum):
+    ONNX = 0
+    TORCHSCRIPT = 1
+
+
+SampleInputsType = Dict[str, List[np.ndarray]]
diff --git a/qai_hub_models/models/controlnet_quantized/README.md b/qai_hub_models/models/controlnet_quantized/README.md
index 9c93c519..5224338a 100644
--- a/qai_hub_models/models/controlnet_quantized/README.md
+++ b/qai_hub_models/models/controlnet_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ControlNet found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/controlnet_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.controlnet_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ControlNet can be found
   [here](https://github.com/lllyasviel/ControlNet/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Adding Conditional Control to Text-to-Image Diffusion Models](https://arxiv.org/abs/2302.05543)
diff --git a/qai_hub_models/models/controlnet_quantized/export.py b/qai_hub_models/models/controlnet_quantized/export.py
index b83cfb58..3bc3a0a0 100644
--- a/qai_hub_models/models/controlnet_quantized/export.py
+++ b/qai_hub_models/models/controlnet_quantized/export.py
@@ -9,13 +9,13 @@
 
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 
 from qai_hub_models.models.controlnet_quantized import Model
 from qai_hub_models.utils.args import export_parser
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BasePrecompiledModel, TargetRuntime
 from qai_hub_models.utils.printing import print_profile_metrics_from_job
 from qai_hub_models.utils.qai_hub_helpers import (
     can_access_qualcomm_ai_hub,
@@ -23,16 +23,16 @@
 )
 
 ALL_COMPONENTS = [
-    "Text-Encoder-Quantized",
-    "UNet-Quantized",
-    "VAE-Decoder-Quantized",
-    "ControlNet-Quantized",
+    "TextEncoder_Quantized",
+    "UNet_Quantized",
+    "VAEDecoder_Quantized",
+    "ControlNet_Quantized",
 ]
 DEFAULT_COMPONENTS = [
-    "Text-Encoder-Quantized",
-    "VAE-Decoder-Quantized",
-    "UNet-Quantized",
-    "ControlNet-Quantized",
+    "TextEncoder_Quantized",
+    "VAEDecoder_Quantized",
+    "UNet_Quantized",
+    "ControlNet_Quantized",
 ]
 
 
@@ -85,9 +85,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or DEFAULT_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "controlnet_quantized",
@@ -104,18 +104,19 @@ def export_model(
             component_arg,
         )
 
+    target_runtime = TargetRuntime.TFLITE
     # 1. Initialize model
     print("Initializing model class")
     model = Model.from_precompiled()
-    components_dict = {}
-    if "Text-Encoder-Quantized" in components:
-        components_dict["Text-Encoder-Quantized"] = model.text_encoder
-    if "UNet-Quantized" in components:
-        components_dict["UNet-Quantized"] = model.unet
-    if "VAE-Decoder-Quantized" in components:
-        components_dict["VAE-Decoder-Quantized"] = model.vae_decoder
-    if "ControlNet-Quantized" in components:
-        components_dict["ControlNet-Quantized"] = model.controlnet
+    components_dict: Dict[str, BasePrecompiledModel] = {}
+    if "TextEncoder_Quantized" in components:
+        components_dict["TextEncoder_Quantized"] = model.text_encoder  # type: ignore
+    if "UNet_Quantized" in components:
+        components_dict["UNet_Quantized"] = model.unet  # type: ignore
+    if "VAEDecoder_Quantized" in components:
+        components_dict["VAEDecoder_Quantized"] = model.vae_decoder  # type: ignore
+    if "ControlNet_Quantized" in components:
+        components_dict["ControlNet_Quantized"] = model.controlnet  # type: ignore
 
     # 2. Upload model assets to hub
     print("Uploading model assets on hub")
@@ -126,39 +127,51 @@ def export_model(
         )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=uploaded_models[component_name],
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=uploaded_models[component_name],
                 inputs=sample_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Summarize the results from profiling
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     return {
diff --git a/qai_hub_models/models/controlnet_quantized/info.yaml b/qai_hub_models/models/controlnet_quantized/info.yaml
index d2a7df33..1bec5406 100644
--- a/qai_hub_models/models/controlnet_quantized/info.yaml
+++ b/qai_hub_models/models/controlnet_quantized/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/2302.05543
 research_paper_title: Adding Conditional Control to Text-to-Image Diffusion Models
 license: https://github.com/lllyasviel/ControlNet/blob/main/LICENSE
+deploy_license: https://github.com/lllyasviel/ControlNet/blob/main/LICENSE
 source_repo: https://github.com/lllyasviel/ControlNet
 technical_details:
   Input: Text prompt and input image as a reference
@@ -35,3 +36,4 @@ has_static_banner: yes
 has_animated_banner: no
 dataset: []
 license_type: apache-2.0
+deploy_license_type: apache-2.0
diff --git a/qai_hub_models/models/controlnet_quantized/model.py b/qai_hub_models/models/controlnet_quantized/model.py
index 57a882a0..0abe7574 100644
--- a/qai_hub_models/models/controlnet_quantized/model.py
+++ b/qai_hub_models/models/controlnet_quantized/model.py
@@ -6,8 +6,9 @@
 
 import os
 
+from qai_hub_models.models.protocols import FromPrecompiledProtocol
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BasePrecompiledModel
+from qai_hub_models.utils.base_model import BasePrecompiledModel, CollectionModel
 from qai_hub_models.utils.input_spec import InputSpec
 
 MODEL_ID = __name__.split(".")[-2]
@@ -19,7 +20,7 @@
 CONTROL_NET = os.path.join(QNN_SDK_PREFIX, "controlnet.serialized.bin")
 
 
-class ControlNetQuantized:
+class ControlNetQuantized(FromPrecompiledProtocol, CollectionModel):
     """
     ControlNet class consists of
         - Text Encoder
@@ -55,9 +56,6 @@ class ClipVITTextEncoder(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "ClipVITTextEncoder":
         text_encoder_path = CachedWebModelAsset.from_asset_store(
@@ -65,10 +63,8 @@ def from_precompiled(cls) -> "ClipVITTextEncoder":
         ).fetch()
         return ClipVITTextEncoder(text_encoder_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {"input_1": ((1, 77), "int32")}
 
 
@@ -80,9 +76,6 @@ class Unet(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "Unet":
         model_path = CachedWebModelAsset.from_asset_store(
@@ -90,10 +83,8 @@ def from_precompiled(cls) -> "Unet":
         ).fetch()
         return Unet(model_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {
             "input_1": ((1, 64, 64, 4), "float32"),
             "input_2": ((1, 1280), "float32"),
@@ -122,9 +113,6 @@ class VAEDecoder(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "VAEDecoder":
         model_path = CachedWebModelAsset.from_asset_store(
@@ -132,10 +120,8 @@ def from_precompiled(cls) -> "VAEDecoder":
         ).fetch()
         return VAEDecoder(model_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {"input_1": ((1, 64, 64, 4), "float32")}
 
 
@@ -147,9 +133,6 @@ class ControlNet(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "ControlNet":
         model_path = CachedWebModelAsset.from_asset_store(
@@ -157,10 +140,8 @@ def from_precompiled(cls) -> "ControlNet":
         ).fetch()
         return ControlNet(model_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {
             "input_1": ((1, 64, 64, 4), "float32"),
             "input_2": ((1, 1280), "float32"),
diff --git a/qai_hub_models/models/controlnet_quantized/requirements.txt b/qai_hub_models/models/controlnet_quantized/requirements.txt
index 8d0cd0c6..83aa3d48 100644
--- a/qai_hub_models/models/controlnet_quantized/requirements.txt
+++ b/qai_hub_models/models/controlnet_quantized/requirements.txt
@@ -1,3 +1,2 @@
-transformers==4.31.0
+transformers==4.27.4
 diffusers[torch]==0.21.4
-opencv-python==4.8.1.78
diff --git a/qai_hub_models/models/controlnet_quantized/test.py b/qai_hub_models/models/controlnet_quantized/test.py
index f192cd28..18c31392 100644
--- a/qai_hub_models/models/controlnet_quantized/test.py
+++ b/qai_hub_models/models/controlnet_quantized/test.py
@@ -8,6 +8,11 @@
 
 from qai_hub_models.models.controlnet_quantized.demo import main as demo_main
 from qai_hub_models.models.controlnet_quantized.export import export_model
+from qai_hub_models.models.controlnet_quantized.model import ControlNetQuantized
+
+
+def test_from_precompiled():
+    ControlNetQuantized.from_precompiled()
 
 
 @pytest.mark.skip("#105 move slow_cloud and slow tests to nightly.")
diff --git a/qai_hub_models/models/convnext_tiny/README.md b/qai_hub_models/models/convnext_tiny/README.md
index 836052f4..3f75d201 100644
--- a/qai_hub_models/models/convnext_tiny/README.md
+++ b/qai_hub_models/models/convnext_tiny/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ConvNext-Tiny found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/convnext_tiny).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.convnext_tiny.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ConvNext-Tiny can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545)
diff --git a/qai_hub_models/models/convnext_tiny/conftest.py b/qai_hub_models/models/convnext_tiny/conftest.py
new file mode 100644
index 00000000..b53df396
--- /dev/null
+++ b/qai_hub_models/models/convnext_tiny/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.convnext_tiny import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.convnext_tiny.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/convnext_tiny/demo.py b/qai_hub_models/models/convnext_tiny/demo.py
index 892edb19..42a15676 100644
--- a/qai_hub_models/models/convnext_tiny/demo.py
+++ b/qai_hub_models/models/convnext_tiny/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.convnext_tiny.model import ConvNextTiny
+from qai_hub_models.models.convnext_tiny.model import MODEL_ID, ConvNextTiny
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ConvNextTiny, is_test)
+    imagenet_demo(ConvNextTiny, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py
index b2173781..4649e4ef 100644
--- a/qai_hub_models/models/convnext_tiny/export.py
+++ b/qai_hub_models/models/convnext_tiny/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/convnext_tiny/info.yaml b/qai_hub_models/models/convnext_tiny/info.yaml
index 70292699..f3e7e9e1 100644
--- a/qai_hub_models/models/convnext_tiny/info.yaml
+++ b/qai_hub_models/models/convnext_tiny/info.yaml
@@ -12,6 +12,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2201.03545
 research_paper_title: A ConvNet for the 2020s
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/convnext.py
 technical_details:
   Model checkpoint: Imagenet
@@ -33,6 +34,7 @@ related_models:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/convnext_tiny/model.py b/qai_hub_models/models/convnext_tiny/model.py
index 8b838392..3e6079f1 100644
--- a/qai_hub_models/models/convnext_tiny/model.py
+++ b/qai_hub_models/models/convnext_tiny/model.py
@@ -14,6 +14,6 @@
 
 class ConvNextTiny(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ConvNextTiny:
         net = tv_models.convnext_tiny(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml
index e76fd904..d850c5bf 100644
--- a/qai_hub_models/models/convnext_tiny/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ConvNext-Tiny
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11532.0
-      throughput: 86.71522719389525
+      inference_time: 11538.0
+      throughput: 86.67013347200555
       estimated_peak_memory_range:
-        min: 339968
-        max: 2817216
+        min: 53248
+        max: 2750320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: jmg9zykqp
+      job_id: jnp10l25q
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:26:23.235644Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 8123.0
+      throughput: 123.10722639418934
+      estimated_peak_memory_range:
+        min: 40960
+        max: 205818960
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 380
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 380
+      job_id: jvgdw9e5j
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:23:45.273161Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:26:23.235670Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/convnext_tiny/test.py b/qai_hub_models/models/convnext_tiny/test.py
index b45cb350..5b0b2b77 100644
--- a/qai_hub_models/models/convnext_tiny/test.py
+++ b/qai_hub_models/models/convnext_tiny/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(ConvNextTiny.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ConvNextTiny.from_pretrained())
 
diff --git a/qai_hub_models/models/ddrnet23_slim/README.md b/qai_hub_models/models/ddrnet23_slim/README.md
index c69a3a18..374c2f4a 100644
--- a/qai_hub_models/models/ddrnet23_slim/README.md
+++ b/qai_hub_models/models/ddrnet23_slim/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DDRNet23-Slim found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ddrnet23_slim).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.ddrnet23_slim.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DDRNet23-Slim can be found
   [here](https://github.com/chenjun2hao/DDRNet.pytorch/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes](https://arxiv.org/abs/2101.06085)
diff --git a/qai_hub_models/models/ddrnet23_slim/conftest.py b/qai_hub_models/models/ddrnet23_slim/conftest.py
new file mode 100644
index 00000000..311a5aa2
--- /dev/null
+++ b/qai_hub_models/models/ddrnet23_slim/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ddrnet23_slim import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ddrnet23_slim.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ddrnet23_slim/demo.py b/qai_hub_models/models/ddrnet23_slim/demo.py
index c97479cd..f6006e1a 100644
--- a/qai_hub_models/models/ddrnet23_slim/demo.py
+++ b/qai_hub_models/models/ddrnet23_slim/demo.py
@@ -36,8 +36,8 @@ def main(is_test: bool = False):
         help="image file path or URL",
     )
     args = parser.parse_args([] if is_test else None)
-    model = demo_model_from_cli_args(DDRNet, args)
-    validate_on_device_demo_args(args, DDRNet.get_model_id())
+    model = demo_model_from_cli_args(DDRNet, MODEL_ID, args)
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # Load image
     (_, _, height, width) = DDRNet.get_input_spec()["image"][0]
diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py
index ef4a4df7..62378aea 100644
--- a/qai_hub_models/models/ddrnet23_slim/export.py
+++ b/qai_hub_models/models/ddrnet23_slim/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/ddrnet23_slim/info.yaml b/qai_hub_models/models/ddrnet23_slim/info.yaml
index 95e0fdab..cf0776a3 100644
--- a/qai_hub_models/models/ddrnet23_slim/info.yaml
+++ b/qai_hub_models/models/ddrnet23_slim/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/2101.06085
 research_paper_title: Deep Dual-resolution Networks for Real-time and Accurate Semantic
   Segmentation of Road Scenes
 license: https://github.com/chenjun2hao/DDRNet.pytorch/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/chenjun2hao/DDRNet.pytorch
 technical_details:
   Model checkpoint: DDRNet23s_imagenet.pth
@@ -33,5 +34,6 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: mit
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml
index 1f405dd9..277e6c15 100644
--- a/qai_hub_models/models/ddrnet23_slim/perf.yaml
+++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DDRNet23-Slim
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6736.0
-      throughput: 148.45605700712588
+      inference_time: 6741.0
+      throughput: 148.3459427384661
       estimated_peak_memory_range:
-        min: 991232
-        max: 3246040
+        min: 1024000
+        max: 28696320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: jvgddqv6g
+      job_id: jz5wo7zp1
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:17:36.932886Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 4644.0
+      throughput: 215.33161068044788
+      estimated_peak_memory_range:
+        min: 45056
+        max: 68954288
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 131
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 131
+      job_id: jmg9vmq57
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:12:22.404643Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:17:36.932896Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/ddrnet23_slim/test.py b/qai_hub_models/models/ddrnet23_slim/test.py
index 5833b4d1..bbffdb0d 100644
--- a/qai_hub_models/models/ddrnet23_slim/test.py
+++ b/qai_hub_models/models/ddrnet23_slim/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 
 from qai_hub_models.models.ddrnet23_slim.app import DDRNetApp
 from qai_hub_models.models.ddrnet23_slim.demo import INPUT_IMAGE_ADDRESS
@@ -33,6 +34,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     app = DDRNetApp(DDRNet.from_pretrained().convert_to_torchscript())
diff --git a/qai_hub_models/models/deeplabv3_resnet50/README.md b/qai_hub_models/models/deeplabv3_resnet50/README.md
index 75610be5..a12383b3 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/README.md
+++ b/qai_hub_models/models/deeplabv3_resnet50/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DeepLabV3-ResNet50 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/deeplabv3_resnet50).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.deeplabv3_resnet50.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DeepLabV3-ResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
diff --git a/qai_hub_models/models/deeplabv3_resnet50/conftest.py b/qai_hub_models/models/deeplabv3_resnet50/conftest.py
new file mode 100644
index 00000000..bc194115
--- /dev/null
+++ b/qai_hub_models/models/deeplabv3_resnet50/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.deeplabv3_resnet50 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.deeplabv3_resnet50.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/deeplabv3_resnet50/demo.py b/qai_hub_models/models/deeplabv3_resnet50/demo.py
index e182f4ae..5c5afb78 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/demo.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/demo.py
@@ -20,7 +20,9 @@
 
 
 def main(is_test: bool = False):
-    deeplabv3_demo(DeepLabV3_ResNet50, INPUT_IMAGE_ADDRESS, NUM_CLASSES, is_test)
+    deeplabv3_demo(
+        DeepLabV3_ResNet50, MODEL_ID, INPUT_IMAGE_ADDRESS, NUM_CLASSES, is_test
+    )
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py
index 2bacc87c..ff57a6fc 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/export.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0,output_1",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0,output_1", inference_result, target_runtime
diff --git a/qai_hub_models/models/deeplabv3_resnet50/info.yaml b/qai_hub_models/models/deeplabv3_resnet50/info.yaml
index 2e150816..68562270 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/info.yaml
+++ b/qai_hub_models/models/deeplabv3_resnet50/info.yaml
@@ -11,6 +11,7 @@ tags: []
 research_paper: https://arxiv.org/abs/1706.05587
 research_paper_title: Rethinking Atrous Convolution for Semantic Image Segmentation
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/deeplabv3.py
 technical_details:
@@ -32,4 +33,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/deeplabv3_resnet50/model.py b/qai_hub_models/models/deeplabv3_resnet50/model.py
index 3514cc0d..0fc2e6b1 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/model.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/model.py
@@ -9,7 +9,7 @@
 
 from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
 from qai_hub_models.models._shared.deeplab.evaluator import DeepLabV3Evaluator
-from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.input_spec import InputSpec
 
 MODEL_ID = __name__.split(".")[-2]
@@ -62,3 +62,19 @@ def get_input_spec(
         # This can be used with the qai_hub python API to declare
         # the model input specification upon submitting a profile job.
         return {"image": ((batch_size, num_channels, height, width), "float32")}
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --compute_unit gpu"
+
+    def get_hub_profile_options(
+        self, target_runtime: TargetRuntime, other_profile_options: str = ""
+    ) -> str:
+        profile_options = super().get_hub_profile_options(
+            target_runtime, other_profile_options
+        )
+        return profile_options + " --compute_unit gpu"
diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
index eeccef3a..1d0853f7 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DeepLabV3-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 57759.0
-      throughput: 17.313319136411643
+      inference_time: 57559.0
+      throughput: 17.373477649021005
       estimated_peak_memory_range:
-        min: 12288
-        max: 171360368
+        min: 106496
+        max: 3561872
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 96
         layers_on_cpu: 0
         total_layers: 96
-      job_id: jqp4ydxqp
+      job_id: jw5663y5o
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:49:36.627925Z'
     torchscript_onnx_qnn:
-      inference_time: 146022.0
-      throughput: 6.848283135417951
+      inference_time: 145372.0
+      throughput: 6.878903777893955
       estimated_peak_memory_range:
-        min: 806912
-        max: 9532744
+        min: 724992
+        max: 17276040
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 82
         layers_on_cpu: 0
         total_layers: 82
-      job_id: j0pxl67jp
+      job_id: jwgoy1k58
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 40153.0
+      throughput: 24.904739371902473
+      estimated_peak_memory_range:
+        min: 4358144
+        max: 29236608
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 96
+        layers_on_cpu: 0
+        total_layers: 96
+      job_id: j1p3k4n52
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:27.279356Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:51:31.429028Z'
+    torchscript_onnx_qnn:
+      inference_time: 104457.0
+      throughput: 9.573317250160352
+      estimated_peak_memory_range:
+        min: 675840
+        max: 24520160
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 82
+        layers_on_cpu: 0
+        total_layers: 82
+      job_id: j1pv31r5x
+      job_status: Passed
diff --git a/qai_hub_models/models/deeplabv3_resnet50/test.py b/qai_hub_models/models/deeplabv3_resnet50/test.py
index cfff53bf..72a58132 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/test.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 
 from qai_hub_models.models._shared.deeplab.app import DeepLabV3App
 from qai_hub_models.models.deeplabv3_resnet50.demo import INPUT_IMAGE_ADDRESS
@@ -37,6 +38,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     image = load_image(INPUT_IMAGE_ADDRESS)
diff --git a/qai_hub_models/models/densenet121/README.md b/qai_hub_models/models/densenet121/README.md
index a586b0d2..2689b6a2 100644
--- a/qai_hub_models/models/densenet121/README.md
+++ b/qai_hub_models/models/densenet121/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DenseNet-121 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/densenet121).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.densenet121.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DenseNet-121 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
diff --git a/qai_hub_models/models/densenet121/conftest.py b/qai_hub_models/models/densenet121/conftest.py
new file mode 100644
index 00000000..81b42e00
--- /dev/null
+++ b/qai_hub_models/models/densenet121/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.densenet121 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.densenet121.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/densenet121/demo.py b/qai_hub_models/models/densenet121/demo.py
index 72ba762b..d18d86bd 100644
--- a/qai_hub_models/models/densenet121/demo.py
+++ b/qai_hub_models/models/densenet121/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.densenet121.model import DenseNet
+from qai_hub_models.models.densenet121.model import MODEL_ID, DenseNet
 
 
 def main(is_test: bool = False):
-    imagenet_demo(DenseNet, is_test)
+    imagenet_demo(DenseNet, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/densenet121/export.py b/qai_hub_models/models/densenet121/export.py
index 32e11a53..5d02faa4 100644
--- a/qai_hub_models/models/densenet121/export.py
+++ b/qai_hub_models/models/densenet121/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/densenet121/info.yaml b/qai_hub_models/models/densenet121/info.yaml
index 7eb5a937..9d1dda3b 100644
--- a/qai_hub_models/models/densenet121/info.yaml
+++ b/qai_hub_models/models/densenet121/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1608.06993
 research_paper_title: Densely Connected Convolutional Networks
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/densenet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -34,6 +35,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/densenet121/model.py b/qai_hub_models/models/densenet121/model.py
index 79faf024..dacbe0fe 100644
--- a/qai_hub_models/models/densenet121/model.py
+++ b/qai_hub_models/models/densenet121/model.py
@@ -14,6 +14,6 @@
 
 class DenseNet(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> DenseNet:
         net = tv_models.densenet121(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml
index ed91b04c..42d62485 100644
--- a/qai_hub_models/models/densenet121/perf.yaml
+++ b/qai_hub_models/models/densenet121/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DenseNet-121
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1605.0
-      throughput: 623.0529595015577
+      inference_time: 1603.0
+      throughput: 623.8303181534623
       estimated_peak_memory_range:
-        min: 28672
-        max: 20688688
+        min: 16384
+        max: 20547528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 310
-      job_id: jlpe7w275
+      job_id: jqpyen0gy
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:35:22.500705Z'
     torchscript_onnx_qnn:
-      inference_time: 1449.0
-      throughput: 690.1311249137336
+      inference_time: 1436.0
+      throughput: 696.3788300835655
       estimated_peak_memory_range:
-        min: 73728
-        max: 209142552
+        min: 618496
+        max: 5887960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: jygzljwz5
+      job_id: j1p8o6qg9
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1114.0
+      throughput: 897.6660682226212
+      estimated_peak_memory_range:
+        min: 12288
+        max: 93424064
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 310
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 310
+      job_id: j2p0yd0gw
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:21:38.990133Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:39:50.803809Z'
+    torchscript_onnx_qnn:
+      inference_time: 985.0
+      throughput: 1015.2284263959391
+      estimated_peak_memory_range:
+        min: 618496
+        max: 142978448
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 371
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 371
+      job_id: jn5q8ze57
+      job_status: Passed
diff --git a/qai_hub_models/models/densenet121/test.py b/qai_hub_models/models/densenet121/test.py
index 82b584c4..647eaf92 100644
--- a/qai_hub_models/models/densenet121/test.py
+++ b/qai_hub_models/models/densenet121/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(DenseNet.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(DenseNet.from_pretrained())
 
diff --git a/qai_hub_models/models/detr_resnet101/README.md b/qai_hub_models/models/detr_resnet101/README.md
index 54ddfe1b..58a8d4e3 100644
--- a/qai_hub_models/models/detr_resnet101/README.md
+++ b/qai_hub_models/models/detr_resnet101/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DETR-ResNet101 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet101).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.detr_resnet101.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet101 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet101/conftest.py b/qai_hub_models/models/detr_resnet101/conftest.py
new file mode 100644
index 00000000..cc6456c1
--- /dev/null
+++ b/qai_hub_models/models/detr_resnet101/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.detr_resnet101 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.detr_resnet101.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/detr_resnet101/demo.py b/qai_hub_models/models/detr_resnet101/demo.py
index d9ed9cc0..d84dbdbb 100644
--- a/qai_hub_models/models/detr_resnet101/demo.py
+++ b/qai_hub_models/models/detr_resnet101/demo.py
@@ -19,7 +19,7 @@
 # Run DETR app end-to-end on a sample image.
 # The demo will display the predicted mask in a window.
 def main(is_test: bool = False):
-    detr_demo(DETRResNet101, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
+    detr_demo(DETRResNet101, MODEL_ID, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py
index a3e36a66..f46ca4e5 100644
--- a/qai_hub_models/models/detr_resnet101/export.py
+++ b/qai_hub_models/models/detr_resnet101/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/detr_resnet101/info.yaml b/qai_hub_models/models/detr_resnet101/info.yaml
index 45c4e48e..f7bc400e 100644
--- a/qai_hub_models/models/detr_resnet101/info.yaml
+++ b/qai_hub_models/models/detr_resnet101/info.yaml
@@ -11,6 +11,7 @@ use_case: Object Detection
 research_paper: https://arxiv.org/abs/2005.12872
 research_paper_title: End-to-End Object Detection with Transformers
 license: https://github.com/facebookresearch/detr/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/facebookresearch/detr
 technical_details:
   Model checkpoint: ResNet101
@@ -32,5 +33,6 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset:
   - detection-datasets/coco
diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml
index 701f35ef..ab365fa3 100644
--- a/qai_hub_models/models/detr_resnet101/perf.yaml
+++ b/qai_hub_models/models/detr_resnet101/perf.yaml
@@ -2,6 +2,12 @@ aggregated:
   supported_oses:
   - Android
   supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -11,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DETR-ResNet101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 640294.0
-      throughput: 1.5617825561382739
+      inference_time: 563957.0
+      throughput: 1.7731848350140171
       estimated_peak_memory_range:
-        min: 107266048
-        max: 111542968
+        min: 102526976
+        max: 112477944
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -34,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 957
         total_layers: 957
-      job_id: jz5wl39zp
+      job_id: jmg9v8m57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:10:49.800332Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -51,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 489867.0
+      throughput: 2.0413704127855112
+      estimated_peak_memory_range:
+        min: 109977600
+        max: 266823568
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 957
+        total_layers: 957
+      job_id: jnp103n5q
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:15:32.226652Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:10:49.800340Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/detr_resnet101/requirements.txt b/qai_hub_models/models/detr_resnet101/requirements.txt
index 3582ec2c..f9dfeb47 100644
--- a/qai_hub_models/models/detr_resnet101/requirements.txt
+++ b/qai_hub_models/models/detr_resnet101/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.31.0
-timm==0.9.7
+transformers==4.27.4
+timm==0.9.11
diff --git a/qai_hub_models/models/detr_resnet101/test.py b/qai_hub_models/models/detr_resnet101/test.py
index 7a9b8da1..3b87d3bd 100644
--- a/qai_hub_models/models/detr_resnet101/test.py
+++ b/qai_hub_models/models/detr_resnet101/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.detr.app import DETRApp
 from qai_hub_models.models.detr_resnet101.demo import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.detr_resnet101.demo import main as demo_main
@@ -20,6 +22,7 @@ def test_task():
     assert set(list(label.numpy())) == {75, 63, 17}
 
 
+@pytest.mark.trace
 def test_trace():
     net = DETRResNet101.from_pretrained(DEFAULT_WEIGHTS).convert_to_torchscript()
     img = load_image(IMAGE_ADDRESS)
diff --git a/qai_hub_models/models/detr_resnet101_dc5/README.md b/qai_hub_models/models/detr_resnet101_dc5/README.md
index 8e4b4c6b..68300db5 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/README.md
+++ b/qai_hub_models/models/detr_resnet101_dc5/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DETR-ResNet101-DC5 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet101_dc5).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.detr_resnet101_dc5.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet101-DC5 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet101_dc5/conftest.py b/qai_hub_models/models/detr_resnet101_dc5/conftest.py
new file mode 100644
index 00000000..417445fd
--- /dev/null
+++ b/qai_hub_models/models/detr_resnet101_dc5/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.detr_resnet101_dc5 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.detr_resnet101_dc5.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/detr_resnet101_dc5/demo.py b/qai_hub_models/models/detr_resnet101_dc5/demo.py
index fd286725..c90f12b1 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/demo.py
+++ b/qai_hub_models/models/detr_resnet101_dc5/demo.py
@@ -19,7 +19,7 @@
 # Run DETR app end-to-end on a sample image.
 # The demo will display the predicted mask in a window.
 def main(is_test: bool = False):
-    detr_demo(DETRResNet101DC5, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
+    detr_demo(DETRResNet101DC5, MODEL_ID, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py
index b0b2cd5a..e8c61070 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/export.py
+++ b/qai_hub_models/models/detr_resnet101_dc5/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/detr_resnet101_dc5/info.yaml b/qai_hub_models/models/detr_resnet101_dc5/info.yaml
index 323c4743..e1ca9e22 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/info.yaml
+++ b/qai_hub_models/models/detr_resnet101_dc5/info.yaml
@@ -11,6 +11,7 @@ use_case: Object Detection
 research_paper: https://arxiv.org/abs/2005.12872
 research_paper_title: End-to-End Object Detection with Transformers
 license: https://github.com/facebookresearch/detr/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/facebookresearch/detr
 technical_details:
   Model checkpoint: ResNet101-DC5
@@ -32,5 +33,6 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset:
   - detection-datasets/coco
diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
index ce87325d..07e3fdc8 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
+++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
@@ -2,6 +2,12 @@ aggregated:
   supported_oses:
   - Android
   supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -11,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DETR-ResNet101-DC5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 971988.0
-      throughput: 1.0288192858348046
+      inference_time: 976351.0
+      throughput: 1.0242218218652923
       estimated_peak_memory_range:
-        min: 12288
-        max: 291526464
+        min: 168345600
+        max: 171158408
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -34,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 958
         total_layers: 958
-      job_id: jlpe7w875
+      job_id: jep28v6p6
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:03:43.829001Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -51,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 777938.0
+      throughput: 1.2854494831207628
+      estimated_peak_memory_range:
+        min: 175112192
+        max: 339555616
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 958
+        total_layers: 958
+      job_id: jqpye70gy
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:40:02.166898Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:03:43.829010Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/detr_resnet101_dc5/requirements.txt b/qai_hub_models/models/detr_resnet101_dc5/requirements.txt
index 3582ec2c..f9dfeb47 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/requirements.txt
+++ b/qai_hub_models/models/detr_resnet101_dc5/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.31.0
-timm==0.9.7
+transformers==4.27.4
+timm==0.9.11
diff --git a/qai_hub_models/models/detr_resnet101_dc5/test.py b/qai_hub_models/models/detr_resnet101_dc5/test.py
index 64d63836..a0c0bfb4 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/test.py
+++ b/qai_hub_models/models/detr_resnet101_dc5/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.detr.app import DETRApp
 from qai_hub_models.models.detr_resnet101_dc5.demo import IMAGE_ADDRESS
 from qai_hub_models.models.detr_resnet101_dc5.demo import main as demo_main
@@ -25,6 +27,7 @@ def test_task():
     assert set(list(label.numpy())) == {75, 63, 17}
 
 
+@pytest.mark.trace
 def test_trace():
     net = DETRResNet101DC5.from_pretrained(DEFAULT_WEIGHTS).convert_to_torchscript()
     img = load_image(IMAGE_ADDRESS)
diff --git a/qai_hub_models/models/detr_resnet50/README.md b/qai_hub_models/models/detr_resnet50/README.md
index 1089a7bc..3ad8f2a2 100644
--- a/qai_hub_models/models/detr_resnet50/README.md
+++ b/qai_hub_models/models/detr_resnet50/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DETR-ResNet50 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet50).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.detr_resnet50.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet50 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet50/conftest.py b/qai_hub_models/models/detr_resnet50/conftest.py
new file mode 100644
index 00000000..cf3b6330
--- /dev/null
+++ b/qai_hub_models/models/detr_resnet50/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.detr_resnet50 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.detr_resnet50.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/detr_resnet50/demo.py b/qai_hub_models/models/detr_resnet50/demo.py
index 6fcf2a7f..169895e5 100644
--- a/qai_hub_models/models/detr_resnet50/demo.py
+++ b/qai_hub_models/models/detr_resnet50/demo.py
@@ -19,7 +19,7 @@
 # Run DETR app end-to-end on a sample image.
 # The demo will display the predicted mask in a window.
 def main(is_test: bool = False):
-    detr_demo(DETRResNet50, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
+    detr_demo(DETRResNet50, MODEL_ID, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py
index 7c6a0d6c..70838101 100644
--- a/qai_hub_models/models/detr_resnet50/export.py
+++ b/qai_hub_models/models/detr_resnet50/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/detr_resnet50/info.yaml b/qai_hub_models/models/detr_resnet50/info.yaml
index 1daf3e8b..2270fe6e 100644
--- a/qai_hub_models/models/detr_resnet50/info.yaml
+++ b/qai_hub_models/models/detr_resnet50/info.yaml
@@ -11,6 +11,7 @@ use_case: Object Detection
 research_paper: https://arxiv.org/abs/2005.12872
 research_paper_title: End-to-End Object Detection with Transformers
 license: https://github.com/facebookresearch/detr/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/facebookresearch/detr
 technical_details:
   Model checkpoint: ResNet50
@@ -32,5 +33,6 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset:
   - detection-datasets/coco
diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml
index 6f7b0bd7..d07318ba 100644
--- a/qai_hub_models/models/detr_resnet50/perf.yaml
+++ b/qai_hub_models/models/detr_resnet50/perf.yaml
@@ -2,6 +2,12 @@ aggregated:
   supported_oses:
   - Android
   supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -11,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DETR-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 346284.0
-      throughput: 2.887803074932714
+      inference_time: 365312.0
+      throughput: 2.737386124737211
       estimated_peak_memory_range:
-        min: 109121536
-        max: 112011896
+        min: 109416448
+        max: 444976064
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -34,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 889
         total_layers: 889
-      job_id: jvgddqrkg
+      job_id: j1p3k7x52
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:08:31.933833Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -51,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 287302.0
+      throughput: 3.480657983585217
+      estimated_peak_memory_range:
+        min: 108204032
+        max: 196940032
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 889
+        total_layers: 889
+      job_id: jwgoyw458
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:35:06.340774Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:08:31.933846Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/detr_resnet50/requirements.txt b/qai_hub_models/models/detr_resnet50/requirements.txt
index 3582ec2c..f9dfeb47 100644
--- a/qai_hub_models/models/detr_resnet50/requirements.txt
+++ b/qai_hub_models/models/detr_resnet50/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.31.0
-timm==0.9.7
+transformers==4.27.4
+timm==0.9.11
diff --git a/qai_hub_models/models/detr_resnet50/test.py b/qai_hub_models/models/detr_resnet50/test.py
index 9174f95b..634027b4 100644
--- a/qai_hub_models/models/detr_resnet50/test.py
+++ b/qai_hub_models/models/detr_resnet50/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.detr.app import DETRApp
 from qai_hub_models.models.detr_resnet50.demo import main as demo_main
 from qai_hub_models.models.detr_resnet50.model import (
@@ -32,6 +34,7 @@ def test_cli_from_pretrained():
     assert model_from_cli_args(DETRResNet50, args) is not None
 
 
+@pytest.mark.trace
 def test_trace():
     net = DETRResNet50.from_pretrained()
     input_spec = net.get_input_spec()
diff --git a/qai_hub_models/models/detr_resnet50_dc5/README.md b/qai_hub_models/models/detr_resnet50_dc5/README.md
index f93b2bad..59d5fed2 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/README.md
+++ b/qai_hub_models/models/detr_resnet50_dc5/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of DETR-ResNet50-DC5 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet50_dc5).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.detr_resnet50_dc5.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of DETR-ResNet50-DC5 can be found
   [here](https://github.com/facebookresearch/detr/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872)
diff --git a/qai_hub_models/models/detr_resnet50_dc5/conftest.py b/qai_hub_models/models/detr_resnet50_dc5/conftest.py
new file mode 100644
index 00000000..f2a22cab
--- /dev/null
+++ b/qai_hub_models/models/detr_resnet50_dc5/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.detr_resnet50_dc5 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.detr_resnet50_dc5.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/detr_resnet50_dc5/demo.py b/qai_hub_models/models/detr_resnet50_dc5/demo.py
index 0eeaee66..a0552f0e 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/demo.py
+++ b/qai_hub_models/models/detr_resnet50_dc5/demo.py
@@ -19,7 +19,7 @@
 # Run DETR app end-to-end on a sample image.
 # The demo will display the predicted mask in a window.
 def main(is_test: bool = False):
-    detr_demo(DETRResNet50DC5, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
+    detr_demo(DETRResNet50DC5, MODEL_ID, DEFAULT_WEIGHTS, IMAGE_ADDRESS, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py
index 496a68aa..09e9b406 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/export.py
+++ b/qai_hub_models/models/detr_resnet50_dc5/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/detr_resnet50_dc5/info.yaml b/qai_hub_models/models/detr_resnet50_dc5/info.yaml
index 4777b6ea..16b90ef9 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/info.yaml
+++ b/qai_hub_models/models/detr_resnet50_dc5/info.yaml
@@ -11,6 +11,7 @@ use_case: Object Detection
 research_paper: https://arxiv.org/abs/2005.12872
 research_paper_title: End-to-End Object Detection with Transformers
 license: https://github.com/facebookresearch/detr/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/facebookresearch/detr
 technical_details:
   Model checkpoint: ResNet50-DC5
@@ -32,5 +33,6 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset:
   - detection-datasets/coco
diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
index aaba6ce8..4d135c6c 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
+++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
@@ -2,6 +2,12 @@ aggregated:
   supported_oses:
   - Android
   supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -11,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: DETR-ResNet50-DC5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 760148.0
-      throughput: 1.3155332908854591
+      inference_time: 692168.0
+      throughput: 1.4447359600559402
       estimated_peak_memory_range:
-        min: 251318272
-        max: 254954864
+        min: 117583872
+        max: 529905552
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -34,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 890
         total_layers: 890
-      job_id: j1pvlr7m5
+      job_id: jqp4q2lgo
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:27:50.803823Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -51,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 521991.0
+      throughput: 1.9157418422923
+      estimated_peak_memory_range:
+        min: 178831360
+        max: 279734112
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 890
+        total_layers: 890
+      job_id: j0pxvz9g7
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:15:27.657498Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:27:50.803834Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/detr_resnet50_dc5/requirements.txt b/qai_hub_models/models/detr_resnet50_dc5/requirements.txt
index 3582ec2c..f9dfeb47 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/requirements.txt
+++ b/qai_hub_models/models/detr_resnet50_dc5/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.31.0
-timm==0.9.7
+transformers==4.27.4
+timm==0.9.11
diff --git a/qai_hub_models/models/detr_resnet50_dc5/test.py b/qai_hub_models/models/detr_resnet50_dc5/test.py
index 28533460..04f51a81 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/test.py
+++ b/qai_hub_models/models/detr_resnet50_dc5/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.detr.app import DETRApp
 from qai_hub_models.models.detr_resnet50_dc5.demo import MODEL_ASSET_VERSION, MODEL_ID
 from qai_hub_models.models.detr_resnet50_dc5.demo import main as demo_main
@@ -23,6 +25,7 @@ def test_task():
     assert set(list(label.numpy())) == {75, 63, 17}
 
 
+@pytest.mark.trace
 def test_trace():
     net = DETRResNet50DC5.from_pretrained(DEFAULT_WEIGHTS).convert_to_torchscript()
     img = load_image(IMAGE_ADDRESS)
diff --git a/qai_hub_models/models/efficientnet_b0/README.md b/qai_hub_models/models/efficientnet_b0/README.md
index 677b5dc7..e1241088 100644
--- a/qai_hub_models/models/efficientnet_b0/README.md
+++ b/qai_hub_models/models/efficientnet_b0/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of EfficientNet-B0 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/efficientnet_b0).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.efficientnet_b0.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of EfficientNet-B0 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
diff --git a/qai_hub_models/models/efficientnet_b0/conftest.py b/qai_hub_models/models/efficientnet_b0/conftest.py
new file mode 100644
index 00000000..72ffa71c
--- /dev/null
+++ b/qai_hub_models/models/efficientnet_b0/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.efficientnet_b0 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.efficientnet_b0.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/efficientnet_b0/demo.py b/qai_hub_models/models/efficientnet_b0/demo.py
index 40bd18fb..9f01d294 100644
--- a/qai_hub_models/models/efficientnet_b0/demo.py
+++ b/qai_hub_models/models/efficientnet_b0/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.efficientnet_b0.model import EfficientNetB0
+from qai_hub_models.models.efficientnet_b0.model import MODEL_ID, EfficientNetB0
 
 
 def main(is_test: bool = False):
-    imagenet_demo(EfficientNetB0, is_test)
+    imagenet_demo(EfficientNetB0, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/efficientnet_b0/export.py b/qai_hub_models/models/efficientnet_b0/export.py
index aba81864..44abf4a7 100644
--- a/qai_hub_models/models/efficientnet_b0/export.py
+++ b/qai_hub_models/models/efficientnet_b0/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/efficientnet_b0/info.yaml b/qai_hub_models/models/efficientnet_b0/info.yaml
index d66d06a5..0e74436b 100644
--- a/qai_hub_models/models/efficientnet_b0/info.yaml
+++ b/qai_hub_models/models/efficientnet_b0/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/1905.11946
 research_paper_title: 'EfficientNet: Rethinking Model Scaling for Convolutional Neural
   Networks'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/efficientnet_b0/model.py b/qai_hub_models/models/efficientnet_b0/model.py
index 956dc29b..4667e5c9 100644
--- a/qai_hub_models/models/efficientnet_b0/model.py
+++ b/qai_hub_models/models/efficientnet_b0/model.py
@@ -14,6 +14,6 @@
 
 class EfficientNetB0(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> EfficientNetB0:
         net = tv_models.efficientnet_b0(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml
index ea6a7116..bce0b849 100644
--- a/qai_hub_models/models/efficientnet_b0/perf.yaml
+++ b/qai_hub_models/models/efficientnet_b0/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: EfficientNet-B0
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2184.0
-      throughput: 457.87545787545787
+      inference_time: 2174.0
+      throughput: 459.9816007359706
       estimated_peak_memory_range:
-        min: 12288
-        max: 2340896
+        min: 24576
+        max: 2273464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: j0pxl61jp
+      job_id: jlpe9l8gr
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:37:36.573638Z'
     torchscript_onnx_qnn:
-      inference_time: 2166.0
-      throughput: 461.6805170821791
+      inference_time: 2173.0
+      throughput: 460.1932811780948
       estimated_peak_memory_range:
-        min: 12288
-        max: 86865200
+        min: 16384
+        max: 87349280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 242
-      job_id: jo5m06zyg
+      job_id: jz5wo14p1
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1524.0
+      throughput: 656.1679790026246
+      estimated_peak_memory_range:
+        min: 12288
+        max: 70874656
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 243
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 243
+      job_id: jygze44g8
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:27:58.826690Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:42:10.776325Z'
+    torchscript_onnx_qnn:
+      inference_time: 1508.0
+      throughput: 663.1299734748011
+      estimated_peak_memory_range:
+        min: 618496
+        max: 79231776
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 242
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 242
+      job_id: jmg9vxm57
+      job_status: Passed
diff --git a/qai_hub_models/models/efficientnet_b0/test.py b/qai_hub_models/models/efficientnet_b0/test.py
index b7808e69..5ac04c69 100644
--- a/qai_hub_models/models/efficientnet_b0/test.py
+++ b/qai_hub_models/models/efficientnet_b0/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(EfficientNetB0.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(EfficientNetB0.from_pretrained())
 
diff --git a/qai_hub_models/models/esrgan/README.md b/qai_hub_models/models/esrgan/README.md
index 0607633f..781f6201 100644
--- a/qai_hub_models/models/esrgan/README.md
+++ b/qai_hub_models/models/esrgan/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ESRGAN found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/esrgan).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.esrgan.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ESRGAN can be found
   [here](https://github.com/xinntao/ESRGAN/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks](https://arxiv.org/abs/1809.00219)
diff --git a/qai_hub_models/models/esrgan/conftest.py b/qai_hub_models/models/esrgan/conftest.py
new file mode 100644
index 00000000..7e3e37cf
--- /dev/null
+++ b/qai_hub_models/models/esrgan/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.esrgan import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.esrgan.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/esrgan/demo.py b/qai_hub_models/models/esrgan/demo.py
index 3a15c695..03c02612 100644
--- a/qai_hub_models/models/esrgan/demo.py
+++ b/qai_hub_models/models/esrgan/demo.py
@@ -16,6 +16,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=ESRGAN,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/esrgan/export.py b/qai_hub_models/models/esrgan/export.py
index d82b3c60..01c64d63 100644
--- a/qai_hub_models/models/esrgan/export.py
+++ b/qai_hub_models/models/esrgan/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/esrgan/info.yaml b/qai_hub_models/models/esrgan/info.yaml
index f0a23cd5..155c1676 100644
--- a/qai_hub_models/models/esrgan/info.yaml
+++ b/qai_hub_models/models/esrgan/info.yaml
@@ -11,6 +11,7 @@ tags: []
 research_paper: https://arxiv.org/abs/1809.00219
 research_paper_title: 'ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks'
 license: https://github.com/xinntao/ESRGAN/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/xinntao/ESRGAN/
 technical_details:
   Model checkpoint: ESRGAN_x4
@@ -30,4 +31,5 @@ related_models:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml
index 50e0e2bd..1ecf1170 100644
--- a/qai_hub_models/models/esrgan/perf.yaml
+++ b/qai_hub_models/models/esrgan/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ESRGAN
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 76337.0
-      throughput: 13.099807432830737
+      inference_time: 74047.0
+      throughput: 13.504936054127784
       estimated_peak_memory_range:
-        min: 3301376
-        max: 6221192
+        min: 12288
+        max: 4695144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jnp1nw7kg
+      job_id: jnp10rl5q
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:41:56.326001Z'
     torchscript_onnx_qnn:
-      inference_time: 66070.0
-      throughput: 15.135462388375965
+      inference_time: 65507.0
+      throughput: 15.265544140320882
       estimated_peak_memory_range:
-        min: 102400
-        max: 101973424
+        min: 57344
+        max: 55933800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1027
-      job_id: jvgddq8kg
+      job_id: jz5woo6p1
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 53553.0
+      throughput: 18.673090209698803
+      estimated_peak_memory_range:
+        min: 3276800
+        max: 574983152
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1024
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1024
+      job_id: jvgdwjl5j
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:29:43.305116Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:46:30.960659Z'
+    torchscript_onnx_qnn:
+      inference_time: 50563.0
+      throughput: 19.777307517354586
+      estimated_peak_memory_range:
+        min: 86016
+        max: 240922112
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1027
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1027
+      job_id: jmg9vvl57
+      job_status: Passed
diff --git a/qai_hub_models/models/esrgan/test.py b/qai_hub_models/models/esrgan/test.py
index 25a9e20a..f7719ebc 100644
--- a/qai_hub_models/models/esrgan/test.py
+++ b/qai_hub_models/models/esrgan/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
 from qai_hub_models.models.esrgan.demo import IMAGE_ADDRESS
@@ -30,6 +31,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     image = load_image(IMAGE_ADDRESS)
diff --git a/qai_hub_models/models/facebook_denoiser/README.md b/qai_hub_models/models/facebook_denoiser/README.md
index 8b79544c..3c05adc1 100644
--- a/qai_hub_models/models/facebook_denoiser/README.md
+++ b/qai_hub_models/models/facebook_denoiser/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Facebook-Denoiser found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/facebook_denoiser).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.facebook_denoiser.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Facebook-Denoiser can be found
   [here](https://github.com/facebookresearch/denoiser/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Real Time Speech Enhancement in the Waveform Domain](https://arxiv.org/abs/2006.12847)
diff --git a/qai_hub_models/models/facebook_denoiser/app.py b/qai_hub_models/models/facebook_denoiser/app.py
index 5af5473b..537fc024 100644
--- a/qai_hub_models/models/facebook_denoiser/app.py
+++ b/qai_hub_models/models/facebook_denoiser/app.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import os
 from pathlib import Path
 from typing import Callable, List, Sequence
 
@@ -40,7 +39,6 @@ def predict(self, *args, **kwargs):
     def denoise_audio(
         self,
         input_audio: Sequence[Path | str | torch.Tensor | np.ndarray],
-        out_dir: Path | str | None = None,
     ) -> List[Path | torch.Tensor]:
         """
         Denoise and isolate the speech in the provided audio clip(s).
@@ -80,19 +78,9 @@ def denoise_audio(
             for noisy in noisy_audios:
                 out = self.denoiser(noisy)
                 out = out / max(out.abs().max().item(), 1)  # Normalize
-                if all_inputs_are_paths and out_dir:
+                if all_inputs_are_paths:
                     # We don't run files in batches, take the first batch output
                     out = out[:, 0]
                 estimates.append(out)
 
-            if out_dir and all_inputs_are_paths:
-                output_files = []
-                for path, estimate in zip(input_audio, estimates):
-                    filename = os.path.join(
-                        out_dir, os.path.basename(path).rsplit(".", 1)[0]
-                    )
-                    filename = Path(f"{filename}_enhanced.wav")
-                    torchaudio.save(filename, estimate, self.sample_rate)
-                    output_files.append(filename)
-                return output_files
             return estimates
diff --git a/qai_hub_models/models/facebook_denoiser/conftest.py b/qai_hub_models/models/facebook_denoiser/conftest.py
new file mode 100644
index 00000000..f2883f91
--- /dev/null
+++ b/qai_hub_models/models/facebook_denoiser/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.facebook_denoiser import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.facebook_denoiser.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/facebook_denoiser/demo.py b/qai_hub_models/models/facebook_denoiser/demo.py
index 6d6a86fe..cb8eb9d1 100644
--- a/qai_hub_models/models/facebook_denoiser/demo.py
+++ b/qai_hub_models/models/facebook_denoiser/demo.py
@@ -4,16 +4,25 @@
 # ---------------------------------------------------------------------
 import os
 import tempfile
+from pathlib import Path
 from typing import List
 
+import torchaudio
+
 from qai_hub_models.models.facebook_denoiser.app import FacebookDenoiserApp
 from qai_hub_models.models.facebook_denoiser.model import (
     ASSET_VERSION,
+    DEFAULT_SEQUENCE_LENGTH,
     MODEL_ID,
     SAMPLE_RATE,
     FacebookDenoiser,
 )
-from qai_hub_models.utils.args import get_model_cli_parser, model_from_cli_args
+from qai_hub_models.utils.args import (
+    demo_model_from_cli_args,
+    get_model_cli_parser,
+    get_on_device_demo_parser,
+    validate_on_device_demo_args,
+)
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_path
 
 EXAMPLE_RECORDING = CachedWebModelAsset.from_asset_store(
@@ -26,6 +35,7 @@ def main(is_test: bool = False):
     Run facebook denoiser on a sample audio (`.wav`) file.
     """
     parser = get_model_cli_parser(FacebookDenoiser)
+    parser = get_on_device_demo_parser(parser, add_output_dir=True)
     parser.add_argument(
         "--audio",
         nargs="+",
@@ -38,26 +48,41 @@ def main(is_test: bool = False):
         default=SAMPLE_RATE,
         help="Audio sample rate the model was trained on",
     )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        default=os.getcwd(),
-        help="output directory (where output WAV should be written)",
-    )
     args = parser.parse_args([] if is_test else None)
+    model = demo_model_from_cli_args(FacebookDenoiser, MODEL_ID, args)
+    validate_on_device_demo_args(args, MODEL_ID)
 
-    # Load Model
-    source_model = model_from_cli_args(FacebookDenoiser, args)
-    app = FacebookDenoiserApp(source_model, args.sample_rate)
+    app = FacebookDenoiserApp(model, args.sample_rate)
 
     # Download data
-    audio: List[str] = args.audio
+    audio_files: List[str] = args.audio
+    audio_tensors = []
     with tempfile.TemporaryDirectory() as tmpdir:
-        for idx, file in enumerate(audio):
-            audio[idx] = load_path(file, tmpdir)
+        for idx, file in enumerate(audio_files):
+            audio_file = load_path(file, tmpdir)
+            audio, sample_rate = torchaudio.load(audio_file)
+            # By default, cut audio to the default sequence length
+            # since by default, model is compiled with this input size
+            audio_tensor = audio[0, :DEFAULT_SEQUENCE_LENGTH].unsqueeze(0).unsqueeze(0)
+            assert sample_rate == SAMPLE_RATE
+            audio_tensors.append(audio_tensor)
 
         # Dump output from app
-        output = app.denoise_audio(audio, args.output_dir)
+        output = app.denoise_audio(audio_tensors)
+
+        if args.output_dir:
+            output_files = []
+            for file, estimate in zip(audio_files, output):
+                local_path = load_path(file, tmpdir)
+                filename = os.path.join(
+                    args.output_dir, os.path.basename(local_path).rsplit(".", 1)[0]
+                )
+                filename = Path(f"{filename}_enhanced.wav")
+                # make input 2D:
+                estimate = estimate.squeeze().unsqueeze(0)
+                torchaudio.save(filename, estimate, SAMPLE_RATE)
+                output_files.append(filename)
+            return output_files
 
     if not is_test:
         print("Wrote files:")
diff --git a/qai_hub_models/models/facebook_denoiser/export.py b/qai_hub_models/models/facebook_denoiser/export.py
index 4f462b4f..f536b473 100644
--- a/qai_hub_models/models/facebook_denoiser/export.py
+++ b/qai_hub_models/models/facebook_denoiser/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -107,65 +108,77 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
         sample_inputs = model.sample_inputs(input_spec)
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=sample_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/facebook_denoiser/info.yaml b/qai_hub_models/models/facebook_denoiser/info.yaml
index 0b4e8453..3e5cf4ed 100644
--- a/qai_hub_models/models/facebook_denoiser/info.yaml
+++ b/qai_hub_models/models/facebook_denoiser/info.yaml
@@ -11,6 +11,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2006.12847
 research_paper_title: Real Time Speech Enhancement in the Waveform Domain
 license: https://github.com/facebookresearch/denoiser/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/facebookresearch/denoiser
 technical_details:
   Input resolution: 1x1x917
@@ -27,4 +28,5 @@ related_models: []
 has_static_banner: yes
 has_animated_banner: yes
 license_type: cc-by-nc-4.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/facebook_denoiser/model.py b/qai_hub_models/models/facebook_denoiser/model.py
index 72a23b59..68e112f4 100644
--- a/qai_hub_models/models/facebook_denoiser/model.py
+++ b/qai_hub_models/models/facebook_denoiser/model.py
@@ -4,16 +4,19 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
+from typing import Optional
+
 import torch
-from denoiser import pretrained
-from denoiser.pretrained import DNS_48_URL
 
+from qai_hub_models.utils.asset_loaders import SourceAsRoot
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.input_spec import InputSpec
 
+SOURCE_REPOSITORY = "https://github.com/facebookresearch/denoiser"
+SOURCE_REPO_COMMIT = "8afd7c166699bb3c8b2d95b6dd706f71e1075df0"
 SAMPLE_RATE = 16000
 HIDDEN_LAYER_COUNT = 48
-DEFAULT_SEQUENCE_LENGTH = 917
+DEFAULT_SEQUENCE_LENGTH = 100000  # This corresponds to about 6 seconds of audio
 MODEL_ID = "facebook_denoiser"
 ASSET_VERSION = 1
 
@@ -39,8 +42,8 @@ def forward(self, audio: torch.Tensor) -> torch.Tensor:
         """
         return self.net(audio)
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         sequence_length: int = DEFAULT_SEQUENCE_LENGTH,
     ) -> InputSpec:
@@ -52,9 +55,14 @@ def get_input_spec(
 
     @classmethod
     def from_pretrained(
-        cls, state_dict_url: str = DNS_48_URL, hidden_layer_count=HIDDEN_LAYER_COUNT
+        cls, state_dict_url: Optional[str] = None, hidden_layer_count=HIDDEN_LAYER_COUNT
     ) -> FacebookDenoiser:
-        net = pretrained._demucs(
-            state_dict_url is not None, state_dict_url, hidden=hidden_layer_count
-        )
-        return cls(net)
+        with SourceAsRoot(
+            SOURCE_REPOSITORY, SOURCE_REPO_COMMIT, MODEL_ID, ASSET_VERSION
+        ):
+            from denoiser.pretrained import DNS_48_URL, _demucs
+
+            if state_dict_url is None:
+                state_dict_url = DNS_48_URL
+            net = _demucs(True, state_dict_url, hidden=hidden_layer_count)
+            return cls(net)
diff --git a/qai_hub_models/models/facebook_denoiser/perf.yaml b/qai_hub_models/models/facebook_denoiser/perf.yaml
index 4f7e4ae7..425ac777 100644
--- a/qai_hub_models/models/facebook_denoiser/perf.yaml
+++ b/qai_hub_models/models/facebook_denoiser/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Facebook-Denoiser
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6985.0
-      throughput: 143.16392269148176
+      inference_time: 711384.0
+      throughput: 1.4057105585731475
       estimated_peak_memory_range:
-        min: 28246016
-        max: 51679504
+        min: 236318720
+        max: 349174920
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 209
         total_layers: 209
-      job_id: jn5qlrw7p
+      job_id: j1p3kwm52
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:10:56.043154Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 670316.0
+      throughput: 1.4918337023135357
+      estimated_peak_memory_range:
+        min: 481374208
+        max: 504692832
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 209
+        total_layers: 209
+      job_id: jwgoy4158
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:10:37.856306Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:10:56.043167Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/facebook_denoiser/requirements.txt b/qai_hub_models/models/facebook_denoiser/requirements.txt
index 0307e34c..855a9703 100644
--- a/qai_hub_models/models/facebook_denoiser/requirements.txt
+++ b/qai_hub_models/models/facebook_denoiser/requirements.txt
@@ -1,3 +1,3 @@
-denoiser
-torchaudio
+hydra-core==1.3.0
+torchaudio==0.13.1
 PySoundFile; sys_platform == 'win32'
diff --git a/qai_hub_models/models/facebook_denoiser/test.py b/qai_hub_models/models/facebook_denoiser/test.py
index 56a46b6c..c77ab11a 100644
--- a/qai_hub_models/models/facebook_denoiser/test.py
+++ b/qai_hub_models/models/facebook_denoiser/test.py
@@ -2,8 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import numpy as np
 import pytest
-import torch
 import torchaudio
 
 from qai_hub_models.models.facebook_denoiser.app import FacebookDenoiserApp
@@ -16,6 +16,7 @@
     FacebookDenoiser,
 )
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.testing import skip_clone_repo_check
 
 ENHANCED_EXAMPLE_RECORDING = CachedWebModelAsset.from_asset_store(
     MODEL_ID, ASSET_VERSION, "icsi_meeting_recording_enhanced.wav"
@@ -30,18 +31,21 @@ def _handle_runtime_error(e: RuntimeError):
     )
 
 
+@skip_clone_repo_check
 def test_task():
     app = FacebookDenoiserApp(FacebookDenoiser.from_pretrained())
     try:
-        out = app.predict([EXAMPLE_RECORDING.fetch()])[0][:, 0]
+        out = app.predict([EXAMPLE_RECORDING.fetch()])[0]
     except RuntimeError as e:
         _handle_runtime_error(e)
         return
     expected, _ = torchaudio.load(ENHANCED_EXAMPLE_RECORDING.fetch())
-    torch.testing.assert_allclose(out, expected)
+    np.testing.assert_allclose(out, expected, atol=1e-07)
 
 
 @pytest.mark.skip(reason="Fails with a mysterious error in DefaultCPUAllocator.")
+@pytest.mark.trace
+@skip_clone_repo_check
 def test_trace():
     try:
         input_data, sample_rate = torchaudio.load(EXAMPLE_RECORDING.fetch())
@@ -58,8 +62,9 @@ def test_trace():
         return
 
     expected, _ = torchaudio.load(ENHANCED_EXAMPLE_RECORDING.fetch())
-    torch.testing.assert_allclose(out, expected)
+    np.testing.assert_allclose(out, expected, atol=1e-07)
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/fastsam_s/README.md b/qai_hub_models/models/fastsam_s/README.md
index 1fc0e262..9d774e45 100644
--- a/qai_hub_models/models/fastsam_s/README.md
+++ b/qai_hub_models/models/fastsam_s/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FastSam-S found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/fastsam_s).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.fastsam_s.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FastSam-S can be found
   [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Fast Segment Anything](https://arxiv.org/abs/2306.12156)
diff --git a/qai_hub_models/models/fastsam_s/conftest.py b/qai_hub_models/models/fastsam_s/conftest.py
new file mode 100644
index 00000000..367fe8b9
--- /dev/null
+++ b/qai_hub_models/models/fastsam_s/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.fastsam_s import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.fastsam_s.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/fastsam_s/demo.py b/qai_hub_models/models/fastsam_s/demo.py
index d5396330..df4e5163 100644
--- a/qai_hub_models/models/fastsam_s/demo.py
+++ b/qai_hub_models/models/fastsam_s/demo.py
@@ -16,7 +16,7 @@
 
 
 def main(is_test: bool = False):
-    fastsam_demo(FastSAM_S, image_path=INPUT_IMAGE, is_test=is_test)
+    fastsam_demo(FastSAM_S, MODEL_ID, image_path=INPUT_IMAGE, is_test=is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py
index 61896353..4902b3a5 100644
--- a/qai_hub_models/models/fastsam_s/export.py
+++ b/qai_hub_models/models/fastsam_s/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -111,7 +111,7 @@ def export_model(
 
     # Trace the model
     source_model = torch.jit.trace(
-        model, make_torch_inputs(input_spec), check_trace=False
+        model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
 
     # 2. Compile the model to an on-device asset
@@ -121,29 +121,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_1,output_2,output_3,output_5",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -152,37 +160,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_1,output_2,output_3,output_5", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/fastsam_s/info.yaml b/qai_hub_models/models/fastsam_s/info.yaml
index f2a59ae2..4d701b49 100644
--- a/qai_hub_models/models/fastsam_s/info.yaml
+++ b/qai_hub_models/models/fastsam_s/info.yaml
@@ -14,6 +14,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2306.12156
 research_paper_title: Fast Segment Anything
 license: https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE
+deploy_license: https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE
 source_repo: https://github.com/CASIA-IVA-Lab/FastSAM
 technical_details:
   Model checkpoint: fastsam-s.pt
@@ -33,4 +34,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: agpl-3.0
+deploy_license_type: agpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml
index ca26141a..af668b3d 100644
--- a/qai_hub_models/models/fastsam_s/perf.yaml
+++ b/qai_hub_models/models/fastsam_s/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FastSam-S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 13071.0
-      throughput: 76.50524060898171
+      inference_time: 13114.0
+      throughput: 76.25438462711605
       estimated_peak_memory_range:
-        min: 7827456
-        max: 10814968
+        min: 7823360
+        max: 25444440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 288
-      job_id: jn5qlr97p
+      job_id: jegn21vgo
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:39:15.450027Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 9234.0
+      throughput: 108.29542993285683
+      estimated_peak_memory_range:
+        min: 6332416
+        max: 79756208
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 288
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 288
+      job_id: joprkxv50
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:18:24.085348Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:39:15.450036Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/fastsam_s/requirements.txt b/qai_hub_models/models/fastsam_s/requirements.txt
index 8d55bfa4..94980b0d 100644
--- a/qai_hub_models/models/fastsam_s/requirements.txt
+++ b/qai_hub_models/models/fastsam_s/requirements.txt
@@ -1,2 +1,3 @@
+seaborn==0.11.0
+thop==0.1.1.post2209072238
 ultralytics==8.0.193
-torchvision
diff --git a/qai_hub_models/models/fastsam_x/README.md b/qai_hub_models/models/fastsam_x/README.md
index da7d4af4..4983e01b 100644
--- a/qai_hub_models/models/fastsam_x/README.md
+++ b/qai_hub_models/models/fastsam_x/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FastSam-X found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/fastsam_x).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.fastsam_x.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FastSam-X can be found
   [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Fast Segment Anything](https://arxiv.org/abs/2306.12156)
diff --git a/qai_hub_models/models/fastsam_x/conftest.py b/qai_hub_models/models/fastsam_x/conftest.py
new file mode 100644
index 00000000..e0c49878
--- /dev/null
+++ b/qai_hub_models/models/fastsam_x/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.fastsam_x import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.fastsam_x.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/fastsam_x/demo.py b/qai_hub_models/models/fastsam_x/demo.py
index 0bce5d27..5acecbe5 100644
--- a/qai_hub_models/models/fastsam_x/demo.py
+++ b/qai_hub_models/models/fastsam_x/demo.py
@@ -16,7 +16,7 @@
 
 
 def main(is_test: bool = False):
-    fastsam_demo(FastSAM_X, image_path=INPUT_IMAGE, is_test=is_test)
+    fastsam_demo(FastSAM_X, MODEL_ID, image_path=INPUT_IMAGE, is_test=is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py
index c4cdf8cd..617e16c5 100644
--- a/qai_hub_models/models/fastsam_x/export.py
+++ b/qai_hub_models/models/fastsam_x/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -111,7 +111,7 @@ def export_model(
 
     # Trace the model
     source_model = torch.jit.trace(
-        model, make_torch_inputs(input_spec), check_trace=False
+        model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
 
     # 2. Compile the model to an on-device asset
@@ -121,29 +121,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_1,output_2,output_3,output_5",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -152,37 +160,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_1,output_2,output_3,output_5", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/fastsam_x/info.yaml b/qai_hub_models/models/fastsam_x/info.yaml
index a39f4a47..aa479646 100644
--- a/qai_hub_models/models/fastsam_x/info.yaml
+++ b/qai_hub_models/models/fastsam_x/info.yaml
@@ -14,6 +14,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2306.12156
 research_paper_title: Fast Segment Anything
 license: https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE
+deploy_license: https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE
 source_repo: https://github.com/CASIA-IVA-Lab/FastSAM
 technical_details:
   Model checkpoint: fastsam-x.pt
@@ -33,4 +34,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: agpl-3.0
+deploy_license_type: agpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml
index 3fa52d1d..f4f2b30b 100644
--- a/qai_hub_models/models/fastsam_x/perf.yaml
+++ b/qai_hub_models/models/fastsam_x/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FastSam-X
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 64468.0
-      throughput: 15.511571632437798
+      inference_time: 64155.0
+      throughput: 15.587249629802821
       estimated_peak_memory_range:
-        min: 9224192
-        max: 14449200
+        min: 9207808
+        max: 14058240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 420
-      job_id: jz5wl3xzp
+      job_id: jw566k75o
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:56:58.796143Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 47867.0
+      throughput: 20.891219420477572
+      estimated_peak_memory_range:
+        min: 7962624
+        max: 152777152
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 420
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 420
+      job_id: j1p3kyz52
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:38:35.191434Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:56:58.796153Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/fastsam_x/requirements.txt b/qai_hub_models/models/fastsam_x/requirements.txt
index 8d55bfa4..94980b0d 100644
--- a/qai_hub_models/models/fastsam_x/requirements.txt
+++ b/qai_hub_models/models/fastsam_x/requirements.txt
@@ -1,2 +1,3 @@
+seaborn==0.11.0
+thop==0.1.1.post2209072238
 ultralytics==8.0.193
-torchvision
diff --git a/qai_hub_models/models/fcn_resnet50/README.md b/qai_hub_models/models/fcn_resnet50/README.md
index c78904fe..275c82c9 100644
--- a/qai_hub_models/models/fcn_resnet50/README.md
+++ b/qai_hub_models/models/fcn_resnet50/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FCN_ResNet50 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/fcn_resnet50).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.fcn_resnet50.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FCN_ResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038)
diff --git a/qai_hub_models/models/fcn_resnet50/conftest.py b/qai_hub_models/models/fcn_resnet50/conftest.py
new file mode 100644
index 00000000..21f265c6
--- /dev/null
+++ b/qai_hub_models/models/fcn_resnet50/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.fcn_resnet50 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.fcn_resnet50.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/fcn_resnet50/demo.py b/qai_hub_models/models/fcn_resnet50/demo.py
index 33478943..6c12063c 100644
--- a/qai_hub_models/models/fcn_resnet50/demo.py
+++ b/qai_hub_models/models/fcn_resnet50/demo.py
@@ -38,8 +38,8 @@ def main(is_test: bool = False):
     )
 
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, FCN_ResNet50.get_model_id())
-    model = demo_model_from_cli_args(FCN_ResNet50, args)
+    validate_on_device_demo_args(args, MODEL_ID)
+    model = demo_model_from_cli_args(FCN_ResNet50, MODEL_ID, args)
 
     # This FCN ResNet 50 demo comes from
     # https://pytorch.org/hub/pytorch_vision_fcn_resnet101/
diff --git a/qai_hub_models/models/fcn_resnet50/export.py b/qai_hub_models/models/fcn_resnet50/export.py
index 63a60381..3a5dd85a 100644
--- a/qai_hub_models/models/fcn_resnet50/export.py
+++ b/qai_hub_models/models/fcn_resnet50/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/fcn_resnet50/info.yaml b/qai_hub_models/models/fcn_resnet50/info.yaml
index 074affa6..87c73764 100644
--- a/qai_hub_models/models/fcn_resnet50/info.yaml
+++ b/qai_hub_models/models/fcn_resnet50/info.yaml
@@ -11,6 +11,7 @@ tags: []
 research_paper: https://arxiv.org/abs/1411.4038
 research_paper_title: Fully Convolutional Networks for Semantic Segmentation
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py
 technical_details:
@@ -33,4 +34,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml
index 7c674dea..477b5790 100644
--- a/qai_hub_models/models/fcn_resnet50/perf.yaml
+++ b/qai_hub_models/models/fcn_resnet50/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FCN_ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 8563.0
-      throughput: 116.78150181011328
+      inference_time: 8550.0
+      throughput: 116.95906432748538
       estimated_peak_memory_range:
         min: 4263936
-        max: 11057224
+        max: 6443424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: joprl21vp
+      job_id: jn5q8dm57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:32:21.594233Z'
     torchscript_onnx_qnn:
-      inference_time: 7864.0
-      throughput: 127.1617497456765
+      inference_time: 7881.0
+      throughput: 126.8874508311128
       estimated_peak_memory_range:
         min: 20480
-        max: 13081680
+        max: 13250472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jep2r93xg
+      job_id: jw566075o
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 6407.0
+      throughput: 156.07928827844546
+      estimated_peak_memory_range:
+        min: 4251648
+        max: 76376944
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 84
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 84
+      job_id: j1glnqlpv
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:21:53.248417Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:34:20.486125Z'
+    torchscript_onnx_qnn:
+      inference_time: 5846.0
+      throughput: 171.05713308244952
+      estimated_peak_memory_range:
+        min: 638976
+        max: 55934880
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 126
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 126
+      job_id: j1p3krz52
+      job_status: Passed
diff --git a/qai_hub_models/models/fcn_resnet50/test.py b/qai_hub_models/models/fcn_resnet50/test.py
index 7c30198c..4ff0b2ab 100644
--- a/qai_hub_models/models/fcn_resnet50/test.py
+++ b/qai_hub_models/models/fcn_resnet50/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 
 from qai_hub_models.models.fcn_resnet50.app import FCN_ResNet50App
 from qai_hub_models.models.fcn_resnet50.demo import INPUT_IMAGE_ADDRESS
@@ -39,10 +40,12 @@ def test_task():
     _test_impl(FCN_ResNet50App(FCN_ResNet50.from_pretrained()))
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     _test_impl(FCN_ResNet50App(FCN_ResNet50.from_pretrained().convert_to_torchscript()))
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/README.md b/qai_hub_models/models/ffnet_122ns_lowres/README.md
index b1f3e848..56834d32 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/README.md
+++ b/qai_hub_models/models/ffnet_122ns_lowres/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-122NS-LowRes found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_122ns_lowres).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.ffnet_122ns_lowres.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-122NS-LowRes can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/conftest.py b/qai_hub_models/models/ffnet_122ns_lowres/conftest.py
new file mode 100644
index 00000000..d6573b6a
--- /dev/null
+++ b/qai_hub_models/models/ffnet_122ns_lowres/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_122ns_lowres import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_122ns_lowres.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/export.py b/qai_hub_models/models/ffnet_122ns_lowres/export.py
index 3125277a..05689b50 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/export.py
+++ b/qai_hub_models/models/ffnet_122ns_lowres/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/info.yaml b/qai_hub_models/models/ffnet_122ns_lowres/info.yaml
index 10198c19..a40f6ccd 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/info.yaml
+++ b/qai_hub_models/models/ffnet_122ns_lowres/info.yaml
@@ -12,6 +12,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet122NS_CCC_cityscapes_state_dict_quarts_pre_down
@@ -30,7 +31,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
index f41f23a1..37ffb9dc 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
+++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-122NS-LowRes
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 10460.0
-      throughput: 95.60229445506693
+      inference_time: 10407.0
+      throughput: 96.08917075045642
       estimated_peak_memory_range:
-        min: 643072
-        max: 2912400
+        min: 12288
+        max: 2345904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: jqpyojnr5
+      job_id: jmg9vel57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:45:44.022843Z'
     torchscript_onnx_qnn:
-      inference_time: 10778.0
-      throughput: 92.78159213212099
+      inference_time: 10785.0
+      throughput: 92.7213722763097
       estimated_peak_memory_range:
-        min: 6332416
-        max: 39442976
+        min: 6205440
+        max: 39312144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 349
-      job_id: j2p0m2k2g
+      job_id: jvgdwle5j
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 7373.0
+      throughput: 135.63000135630003
+      estimated_peak_memory_range:
+        min: 643072
+        max: 58158976
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 216
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 216
+      job_id: jnp10x25q
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:22:01.714758Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:47:44.631260Z'
+    torchscript_onnx_qnn:
+      inference_time: 7627.0
+      throughput: 131.1131506490101
+      estimated_peak_memory_range:
+        min: 6311936
+        max: 85982464
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 349
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 349
+      job_id: jz57z3lp3
+      job_status: Passed
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/requirements.txt b/qai_hub_models/models/ffnet_122ns_lowres/requirements.txt
index 73ad8aa8..2470ac6c 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/requirements.txt
+++ b/qai_hub_models/models/ffnet_122ns_lowres/requirements.txt
@@ -1 +1 @@
-scikit-image>=0.21.0
+scikit-image==0.21.0
diff --git a/qai_hub_models/models/ffnet_40s/README.md b/qai_hub_models/models/ffnet_40s/README.md
index ce9169df..4a670a0a 100644
--- a/qai_hub_models/models/ffnet_40s/README.md
+++ b/qai_hub_models/models/ffnet_40s/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-40S found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_40s).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.ffnet_40s.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-40S can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_40s/conftest.py b/qai_hub_models/models/ffnet_40s/conftest.py
new file mode 100644
index 00000000..61ded012
--- /dev/null
+++ b/qai_hub_models/models/ffnet_40s/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_40s import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_40s.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_40s/export.py b/qai_hub_models/models/ffnet_40s/export.py
index 0800da7f..dac6aea6 100644
--- a/qai_hub_models/models/ffnet_40s/export.py
+++ b/qai_hub_models/models/ffnet_40s/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_40s/info.yaml b/qai_hub_models/models/ffnet_40s/info.yaml
index 94830c01..967b8b32 100644
--- a/qai_hub_models/models/ffnet_40s/info.yaml
+++ b/qai_hub_models/models/ffnet_40s/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet40S_dBBB_cityscapes_state_dict_quarts
@@ -31,7 +32,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml
index cd79f677..038d58da 100644
--- a/qai_hub_models/models/ffnet_40s/perf.yaml
+++ b/qai_hub_models/models/ffnet_40s/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-40S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 22739.0
-      throughput: 43.97730770922204
+      inference_time: 22513.0
+      throughput: 44.41878026029405
       estimated_peak_memory_range:
-        min: 2564096
-        max: 5001048
+        min: 2539520
+        max: 5190832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jegnzm9vg
+      job_id: jwgoyl458
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:39.279085Z'
     torchscript_onnx_qnn:
-      inference_time: 17313.0
-      throughput: 57.760064691272454
+      inference_time: 17466.0
+      throughput: 57.25409366769724
       estimated_peak_memory_range:
-        min: 25202688
-        max: 51306904
+        min: 25210880
+        max: 48310168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 141
-      job_id: jep2r97xg
+      job_id: j7gjxr7pd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 16613.0
+      throughput: 60.19382411364594
+      estimated_peak_memory_range:
+        min: 61440
+        max: 100488656
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 92
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 92
+      job_id: j1pv3l75x
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:14:26.648274Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:15:49.686166Z'
+    torchscript_onnx_qnn:
+      inference_time: 12681.0
+      throughput: 78.85813421654444
+      estimated_peak_memory_range:
+        min: 25182208
+        max: 82551136
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 141
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 141
+      job_id: jlpe977gr
+      job_status: Passed
diff --git a/qai_hub_models/models/ffnet_40s/requirements.txt b/qai_hub_models/models/ffnet_40s/requirements.txt
index 73ad8aa8..2470ac6c 100644
--- a/qai_hub_models/models/ffnet_40s/requirements.txt
+++ b/qai_hub_models/models/ffnet_40s/requirements.txt
@@ -1 +1 @@
-scikit-image>=0.21.0
+scikit-image==0.21.0
diff --git a/qai_hub_models/models/ffnet_40s_quantized/README.md b/qai_hub_models/models/ffnet_40s_quantized/README.md
index 8508542a..18fb3a95 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/README.md
+++ b/qai_hub_models/models/ffnet_40s_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-40S-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_40s_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.ffnet_40s_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-40S-Quantized can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_40s_quantized/conftest.py b/qai_hub_models/models/ffnet_40s_quantized/conftest.py
new file mode 100644
index 00000000..f17ac459
--- /dev/null
+++ b/qai_hub_models/models/ffnet_40s_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_40s_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_40s_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py
index a12d147c..08ed7624 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_40s_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -123,8 +123,8 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -132,21 +132,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -158,30 +166,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_40s_quantized/info.yaml b/qai_hub_models/models/ffnet_40s_quantized/info.yaml
index bf5bfbe3..163abd5d 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/info.yaml
+++ b/qai_hub_models/models/ffnet_40s_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet40S_dBBB_cityscapes_state_dict_quarts
@@ -32,7 +33,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
index d0408e2b..6d795ec7 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-40S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6451.0
-      throughput: 155.0147263990079
+      inference_time: 6439.0
+      throughput: 155.3036185743128
       estimated_peak_memory_range:
-        min: 851968
-        max: 2582296
+        min: 888832
+        max: 2660784
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 97
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: j0pxl6x9p
+      job_id: jqp4q92go
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:35:32.125659Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 4671.0
+      throughput: 214.08691928923142
+      estimated_peak_memory_range:
+        min: 16384
+        max: 65022448
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 97
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 97
+      job_id: j0pxvd8g7
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:15:22.015621Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:35:32.125673Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/ffnet_40s_quantized/test.py b/qai_hub_models/models/ffnet_40s_quantized/test.py
index 12e10323..9d2ebf14 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/test.py
+++ b/qai_hub_models/models/ffnet_40s_quantized/test.py
@@ -17,5 +17,6 @@ def test_off_target_numerical():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/ffnet_54s/README.md b/qai_hub_models/models/ffnet_54s/README.md
index ecb28e6c..487434ac 100644
--- a/qai_hub_models/models/ffnet_54s/README.md
+++ b/qai_hub_models/models/ffnet_54s/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-54S found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_54s).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.ffnet_54s.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-54S can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_54s/conftest.py b/qai_hub_models/models/ffnet_54s/conftest.py
new file mode 100644
index 00000000..e87eb0b0
--- /dev/null
+++ b/qai_hub_models/models/ffnet_54s/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_54s import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_54s.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_54s/export.py b/qai_hub_models/models/ffnet_54s/export.py
index 14761dc7..5a27ee33 100644
--- a/qai_hub_models/models/ffnet_54s/export.py
+++ b/qai_hub_models/models/ffnet_54s/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_54s/info.yaml b/qai_hub_models/models/ffnet_54s/info.yaml
index be93ffc8..846f0dd0 100644
--- a/qai_hub_models/models/ffnet_54s/info.yaml
+++ b/qai_hub_models/models/ffnet_54s/info.yaml
@@ -12,6 +12,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet54S_dBBB_cityscapes_state_dict_quarts
@@ -30,7 +31,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml
index 13243d9e..9f1617aa 100644
--- a/qai_hub_models/models/ffnet_54s/perf.yaml
+++ b/qai_hub_models/models/ffnet_54s/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-54S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 25261.0
-      throughput: 39.58671469854717
+      inference_time: 24853.0
+      throughput: 40.23659115599727
       estimated_peak_memory_range:
-        min: 2551808
-        max: 4912232
+        min: 2572288
+        max: 4947328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: jygzlj8z5
+      job_id: j0pxv38g7
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:47:57.765081Z'
     torchscript_onnx_qnn:
-      inference_time: 20585.0
-      throughput: 48.57906242409521
+      inference_time: 19975.0
+      throughput: 50.06257822277847
       estimated_peak_memory_range:
-        min: 25206784
-        max: 41071808
+        min: 25214976
+        max: 52299192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 176
-      job_id: jz5wl38zp
+      job_id: joprkok50
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 18421.0
+      throughput: 54.28586938819825
+      estimated_peak_memory_range:
+        min: 462848
+        max: 113159440
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: jo5mro7gk
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:19.360420Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:49:56.432155Z'
+    torchscript_onnx_qnn:
+      inference_time: 14570.0
+      throughput: 68.63417982155113
+      estimated_peak_memory_range:
+        min: 154132480
+        max: 217703424
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 176
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 176
+      job_id: jep2846p6
+      job_status: Passed
diff --git a/qai_hub_models/models/ffnet_54s/requirements.txt b/qai_hub_models/models/ffnet_54s/requirements.txt
index 73ad8aa8..2470ac6c 100644
--- a/qai_hub_models/models/ffnet_54s/requirements.txt
+++ b/qai_hub_models/models/ffnet_54s/requirements.txt
@@ -1 +1 @@
-scikit-image>=0.21.0
+scikit-image==0.21.0
diff --git a/qai_hub_models/models/ffnet_54s_quantized/README.md b/qai_hub_models/models/ffnet_54s_quantized/README.md
index 3b9d860f..346077ff 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/README.md
+++ b/qai_hub_models/models/ffnet_54s_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-54S-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_54s_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.ffnet_54s_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-54S-Quantized can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_54s_quantized/conftest.py b/qai_hub_models/models/ffnet_54s_quantized/conftest.py
new file mode 100644
index 00000000..f60efc9c
--- /dev/null
+++ b/qai_hub_models/models/ffnet_54s_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_54s_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_54s_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py
index 382d06b3..83dc6e73 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_54s_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -123,8 +123,8 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -132,21 +132,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -158,30 +166,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_54s_quantized/info.yaml b/qai_hub_models/models/ffnet_54s_quantized/info.yaml
index a97c6bbf..a7f45fd7 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/info.yaml
+++ b/qai_hub_models/models/ffnet_54s_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet54S_dBBB_cityscapes_state_dict_quarts
@@ -32,7 +33,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
index 4e723a91..1f34553f 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-54S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7130.0
-      throughput: 140.25245441795232
+      inference_time: 7127.0
+      throughput: 140.31149151115477
       estimated_peak_memory_range:
-        min: 643072
-        max: 23970880
+        min: 712704
+        max: 2530520
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 118
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jep2r9wmg
+      job_id: j7gjxeepd
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:16:07.677264Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 5136.0
+      throughput: 194.70404984423675
+      estimated_peak_memory_range:
+        min: 16384
+        max: 71676704
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 118
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 118
+      job_id: jnp10e75q
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:19:49.268425Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:16:07.677274Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/ffnet_54s_quantized/test.py b/qai_hub_models/models/ffnet_54s_quantized/test.py
index 0b5b9132..3202e209 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/test.py
+++ b/qai_hub_models/models/ffnet_54s_quantized/test.py
@@ -17,5 +17,6 @@ def test_off_target_numerical():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/ffnet_78s/README.md b/qai_hub_models/models/ffnet_78s/README.md
index f8012df2..2b2e42f2 100644
--- a/qai_hub_models/models/ffnet_78s/README.md
+++ b/qai_hub_models/models/ffnet_78s/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-78S found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_78s).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.ffnet_78s.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-78S can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_78s/conftest.py b/qai_hub_models/models/ffnet_78s/conftest.py
new file mode 100644
index 00000000..e4a0bbd6
--- /dev/null
+++ b/qai_hub_models/models/ffnet_78s/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_78s import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_78s.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_78s/export.py b/qai_hub_models/models/ffnet_78s/export.py
index 9242f6bb..09e78a83 100644
--- a/qai_hub_models/models/ffnet_78s/export.py
+++ b/qai_hub_models/models/ffnet_78s/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_78s/info.yaml b/qai_hub_models/models/ffnet_78s/info.yaml
index 56fc5f75..1f1d8cf9 100644
--- a/qai_hub_models/models/ffnet_78s/info.yaml
+++ b/qai_hub_models/models/ffnet_78s/info.yaml
@@ -12,6 +12,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet78S_dBBB_cityscapes_state_dict_quarts
@@ -30,7 +31,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml
index 2db45bd8..3190c99d 100644
--- a/qai_hub_models/models/ffnet_78s/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-78S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 29611.0
-      throughput: 33.77123366316572
+      inference_time: 28993.0
+      throughput: 34.49108405477184
       estimated_peak_memory_range:
-        min: 2596864
-        max: 5429112
+        min: 2699264
+        max: 4868664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jep2r9emg
+      job_id: j0pxvq9g7
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:58:52.803970Z'
     torchscript_onnx_qnn:
-      inference_time: 24120.0
-      throughput: 41.459369817578775
+      inference_time: 23765.0
+      throughput: 42.07868714496108
       estimated_peak_memory_range:
-        min: 2215936
-        max: 32957000
+        min: 25214976
+        max: 45434792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: jqpyojm45
+      job_id: joprkre50
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 21479.0
+      throughput: 46.557102285953725
+      estimated_peak_memory_range:
+        min: 2478080
+        max: 130875008
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 149
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 149
+      job_id: jegn24mgo
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:36:14.251855Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:00:58.594801Z'
+    torchscript_onnx_qnn:
+      inference_time: 17826.0
+      throughput: 56.09783462358353
+      estimated_peak_memory_range:
+        min: 25219072
+        max: 99798224
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 236
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 236
+      job_id: jep281mp6
+      job_status: Passed
diff --git a/qai_hub_models/models/ffnet_78s/requirements.txt b/qai_hub_models/models/ffnet_78s/requirements.txt
index 73ad8aa8..2470ac6c 100644
--- a/qai_hub_models/models/ffnet_78s/requirements.txt
+++ b/qai_hub_models/models/ffnet_78s/requirements.txt
@@ -1 +1 @@
-scikit-image>=0.21.0
+scikit-image==0.21.0
diff --git a/qai_hub_models/models/ffnet_78s_lowres/README.md b/qai_hub_models/models/ffnet_78s_lowres/README.md
index eba7805b..1575435d 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/README.md
+++ b/qai_hub_models/models/ffnet_78s_lowres/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-78S-LowRes found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_78s_lowres).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.ffnet_78s_lowres.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-78S-LowRes can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_78s_lowres/conftest.py b/qai_hub_models/models/ffnet_78s_lowres/conftest.py
new file mode 100644
index 00000000..07c8d92a
--- /dev/null
+++ b/qai_hub_models/models/ffnet_78s_lowres/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_78s_lowres import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_78s_lowres.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_78s_lowres/export.py b/qai_hub_models/models/ffnet_78s_lowres/export.py
index 00e1a3b1..08f9197f 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/export.py
+++ b/qai_hub_models/models/ffnet_78s_lowres/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_78s_lowres/info.yaml b/qai_hub_models/models/ffnet_78s_lowres/info.yaml
index 9a4a4992..c50bd7cc 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/info.yaml
+++ b/qai_hub_models/models/ffnet_78s_lowres/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet78S_BCC_cityscapes_state_dict_quarts_pre_down
@@ -31,7 +32,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
index 4fceeb4f..e55df80c 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-78S-LowRes
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 10833.0
-      throughput: 92.31053263177328
+      inference_time: 10810.0
+      throughput: 92.50693802035153
       estimated_peak_memory_range:
-        min: 671744
-        max: 3588808
+        min: 0
+        max: 1890472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: j0pxl6d9p
+      job_id: jegn2dmgo
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:53:13.887710Z'
     torchscript_onnx_qnn:
-      inference_time: 11410.0
-      throughput: 87.64241893076249
+      inference_time: 11408.0
+      throughput: 87.6577840112202
       estimated_peak_memory_range:
-        min: 565248
-        max: 42397168
+        min: 16384
+        max: 52414400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: jegnzm7mg
+      job_id: jep28qmp6
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 7768.0
+      throughput: 128.73326467559218
+      estimated_peak_memory_range:
+        min: 540672
+        max: 52237632
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 149
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 149
+      job_id: joprkme50
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:29:56.988054Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:57:44.327749Z'
+    torchscript_onnx_qnn:
+      inference_time: 8084.0
+      throughput: 123.70113805047006
+      estimated_peak_memory_range:
+        min: 6328320
+        max: 72586224
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 237
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 237
+      job_id: jqpyek4gy
+      job_status: Passed
diff --git a/qai_hub_models/models/ffnet_78s_lowres/requirements.txt b/qai_hub_models/models/ffnet_78s_lowres/requirements.txt
index 73ad8aa8..2470ac6c 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/requirements.txt
+++ b/qai_hub_models/models/ffnet_78s_lowres/requirements.txt
@@ -1 +1 @@
-scikit-image>=0.21.0
+scikit-image==0.21.0
diff --git a/qai_hub_models/models/ffnet_78s_quantized/README.md b/qai_hub_models/models/ffnet_78s_quantized/README.md
index 4a58d65a..e4cfbc65 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/README.md
+++ b/qai_hub_models/models/ffnet_78s_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of FFNet-78S-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_78s_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.ffnet_78s_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of FFNet-78S-Quantized can be found
   [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236)
diff --git a/qai_hub_models/models/ffnet_78s_quantized/conftest.py b/qai_hub_models/models/ffnet_78s_quantized/conftest.py
new file mode 100644
index 00000000..72a01635
--- /dev/null
+++ b/qai_hub_models/models/ffnet_78s_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.ffnet_78s_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.ffnet_78s_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py
index 0e2f3a1e..49c83ae3 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_78s_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -123,8 +123,8 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -132,21 +132,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -158,30 +166,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/ffnet_78s_quantized/info.yaml b/qai_hub_models/models/ffnet_78s_quantized/info.yaml
index cdb2f813..86f97e34 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/info.yaml
+++ b/qai_hub_models/models/ffnet_78s_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/2206.08236
 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation
 license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/Qualcomm-AI-research/FFNet
 technical_details:
   Model checkpoint: ffnet78S_dBBB_cityscapes_state_dict_quarts
@@ -32,7 +33,8 @@ form_factors:
   - Phone
   - Tablet
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - cityscapes
diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
index 17c2c4c9..3201ba82 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: FFNet-78S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 8362.0
-      throughput: 119.58861516383641
+      inference_time: 8368.0
+      throughput: 119.50286806883365
       estimated_peak_memory_range:
-        min: 655360
-        max: 2403480
+        min: 663552
+        max: 2264096
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 154
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: j1gly2oe5
+      job_id: jegn2jmgo
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:31:42.853131Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 6095.0
+      throughput: 164.06890894175552
+      estimated_peak_memory_range:
+        min: 16384
+        max: 84212448
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 154
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 154
+      job_id: jep282mp6
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:13:29.270963Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:31:42.853166Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/ffnet_78s_quantized/test.py b/qai_hub_models/models/ffnet_78s_quantized/test.py
index 1c3c8a51..9cb6fa96 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/test.py
+++ b/qai_hub_models/models/ffnet_78s_quantized/test.py
@@ -17,5 +17,6 @@ def test_off_target_numerical():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/googlenet/README.md b/qai_hub_models/models/googlenet/README.md
index ddd6d58f..32249c1d 100644
--- a/qai_hub_models/models/googlenet/README.md
+++ b/qai_hub_models/models/googlenet/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of GoogLeNet found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/googlenet).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.googlenet.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of GoogLeNet can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
diff --git a/qai_hub_models/models/googlenet/conftest.py b/qai_hub_models/models/googlenet/conftest.py
new file mode 100644
index 00000000..30481135
--- /dev/null
+++ b/qai_hub_models/models/googlenet/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.googlenet import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.googlenet.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/googlenet/demo.py b/qai_hub_models/models/googlenet/demo.py
index 7be6a572..954f7edd 100644
--- a/qai_hub_models/models/googlenet/demo.py
+++ b/qai_hub_models/models/googlenet/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.googlenet.model import GoogLeNet
+from qai_hub_models.models.googlenet.model import MODEL_ID, GoogLeNet
 
 
 def main(is_test: bool = False):
-    imagenet_demo(GoogLeNet, is_test)
+    imagenet_demo(GoogLeNet, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/googlenet/export.py b/qai_hub_models/models/googlenet/export.py
index 38a7d779..81262c38 100644
--- a/qai_hub_models/models/googlenet/export.py
+++ b/qai_hub_models/models/googlenet/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/googlenet/info.yaml b/qai_hub_models/models/googlenet/info.yaml
index 047a032f..e3143397 100644
--- a/qai_hub_models/models/googlenet/info.yaml
+++ b/qai_hub_models/models/googlenet/info.yaml
@@ -12,6 +12,7 @@ tags: []
 research_paper: https://arxiv.org/abs/1409.4842
 research_paper_title: Going Deeper with Convolutions
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/googlenet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -34,6 +35,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/googlenet/model.py b/qai_hub_models/models/googlenet/model.py
index 4f1dcd5d..d4319351 100644
--- a/qai_hub_models/models/googlenet/model.py
+++ b/qai_hub_models/models/googlenet/model.py
@@ -14,6 +14,6 @@
 
 class GoogLeNet(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
-        net = tv_models.googlenet(weights=weights)
-        return cls(net)
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> GoogLeNet:
+        net = tv_models.googlenet(weights=weights, transform_input=False)
+        return cls(net, transform_input=True)
diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml
index 5a446a3a..655972d7 100644
--- a/qai_hub_models/models/googlenet/perf.yaml
+++ b/qai_hub_models/models/googlenet/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: GoogLeNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1471.0
-      throughput: 679.8096532970768
+      inference_time: 1041.0
+      throughput: 960.6147934678194
       estimated_peak_memory_range:
-        min: 16384
-        max: 1850752
+        min: 12288
+        max: 1836376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 94
+        layers_on_npu: 84
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 94
-      job_id: jw568z3vg
+        total_layers: 84
+      job_id: joprq3950
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:24:33.473846Z'
     torchscript_onnx_qnn:
-      inference_time: 1808.0
-      throughput: 553.0973451327434
+      inference_time: 1083.0
+      throughput: 923.3610341643582
+      estimated_peak_memory_range:
+        min: 32768
+        max: 26497136
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jqpyw37gy
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 647.0
+      throughput: 1545.595054095827
       estimated_peak_memory_range:
-        min: 24576
-        max: 31167584
+        min: 16384
+        max: 45415536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 156
+        layers_on_npu: 84
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 156
-      job_id: j1p3z14x5
+        total_layers: 84
+      job_id: jep26y4g6
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:07:34.463888Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:28:59.659531Z'
+    torchscript_onnx_qnn:
+      inference_time: 682.0
+      throughput: 1466.275659824047
+      estimated_peak_memory_range:
+        min: 0
+        max: 49977664
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: j2p0q065w
+      job_status: Passed
diff --git a/qai_hub_models/models/googlenet/test.py b/qai_hub_models/models/googlenet/test.py
index 6f0b2a66..137eb7f1 100644
--- a/qai_hub_models/models/googlenet/test.py
+++ b/qai_hub_models/models/googlenet/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(GoogLeNet.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(GoogLeNet.from_pretrained())
 
diff --git a/qai_hub_models/models/googlenet_quantized/README.md b/qai_hub_models/models/googlenet_quantized/README.md
index 2b6438a7..8e923612 100644
--- a/qai_hub_models/models/googlenet_quantized/README.md
+++ b/qai_hub_models/models/googlenet_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of GoogLeNetQuantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/googlenet_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.googlenet_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of GoogLeNetQuantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
diff --git a/qai_hub_models/models/googlenet_quantized/conftest.py b/qai_hub_models/models/googlenet_quantized/conftest.py
new file mode 100644
index 00000000..4f758021
--- /dev/null
+++ b/qai_hub_models/models/googlenet_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.googlenet_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.googlenet_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/googlenet_quantized/demo.py b/qai_hub_models/models/googlenet_quantized/demo.py
index 5f363507..797f6fd2 100644
--- a/qai_hub_models/models/googlenet_quantized/demo.py
+++ b/qai_hub_models/models/googlenet_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.googlenet_quantized.model import GoogLeNetQuantizable
+from qai_hub_models.models.googlenet_quantized.model import (
+    MODEL_ID,
+    GoogLeNetQuantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(GoogLeNetQuantizable, is_test)
+    imagenet_demo(GoogLeNetQuantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py
index 34cc8ec6..7ed01e3e 100644
--- a/qai_hub_models/models/googlenet_quantized/export.py
+++ b/qai_hub_models/models/googlenet_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,42 +163,44 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(model_cls=Model)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/googlenet_quantized/info.yaml b/qai_hub_models/models/googlenet_quantized/info.yaml
index c7f1e45c..0d5b5538 100644
--- a/qai_hub_models/models/googlenet_quantized/info.yaml
+++ b/qai_hub_models/models/googlenet_quantized/info.yaml
@@ -13,12 +13,13 @@ tags:
 research_paper: https://arxiv.org/abs/1409.4842
 research_paper_title: Going Deeper with Convolutions
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/googlenet.py
 technical_details:
   Model checkpoint: Imagenet
   Input resolution: 224x224
   Number of parameters: 6.62M
-  Model size: 16.0 MB
+  Model size: 6.55 MB
 applicable_scenarios:
   - Medical Imaging
   - Anomaly Detection
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/googlenet_quantized/model.py b/qai_hub_models/models/googlenet_quantized/model.py
index 59323b6d..e100c42a 100644
--- a/qai_hub_models/models/googlenet_quantized/model.py
+++ b/qai_hub_models/models/googlenet_quantized/model.py
@@ -14,14 +14,17 @@
 
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.googlenet.model import GoogLeNet
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+from qai_hub_models.utils.quantization_aimet import tie_aimet_observer_groups
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 1
+MODEL_ASSET_VERSION = 2
 DEFAULT_ENCODINGS = "googlenet_quantized_encodings.json"
 
 
@@ -37,14 +40,20 @@ def __init__(
     ) -> None:
         GoogLeNet.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=True
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
         aimet_encodings: str | None = "DEFAULT",
-    ) -> "GoogLeNet":
+    ) -> "GoogLeNetQuantizable":
         """
         Parameters:
           aimet_encodings:
@@ -53,17 +62,19 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = GoogLeNet.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
 
+        model = prepare_model(model)
         equalize_model(model, input_shape)
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
+            config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
+        cls._tie_pre_concat_quantizers(sim)
 
         if aimet_encodings:
             if aimet_encodings == "DEFAULT":
@@ -74,3 +85,36 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    @classmethod
+    def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel):
+        """
+        This ties together the output quantizers prior to concatenations. This
+        prevents unnecessary re-quantization during the concatenation.
+        """
+        blocks = [
+            sim.model.net.inception3a,
+            sim.model.net.inception3b,
+            sim.model.net.inception4a,
+            sim.model.net.inception4b,
+            sim.model.net.inception4c,
+            sim.model.net.inception4d,
+            sim.model.net.inception4e,
+            sim.model.net.inception5a,
+            sim.model.net.inception5b,
+        ]
+
+        idx = 3
+        groups = []
+        for block in blocks:
+            groups.append(
+                [
+                    getattr(block.branch1, f"module_relu_{idx}"),
+                    getattr(getattr(block.branch2, "1"), f"module_relu_{idx+2}"),
+                    getattr(getattr(block.branch3, "1"), f"module_relu_{idx+4}"),
+                    getattr(getattr(block.branch4, "1"), f"module_relu_{idx+5}"),
+                ]
+            )
+            idx += 6
+
+        tie_aimet_observer_groups(groups)
diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml
index 78455c1d..e9d21985 100644
--- a/qai_hub_models/models/googlenet_quantized/perf.yaml
+++ b/qai_hub_models/models/googlenet_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: GoogLeNetQuantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1026.0
-      throughput: 974.6588693957115
+      inference_time: 331.0
+      throughput: 3021.1480362537764
       estimated_peak_memory_range:
-        min: 20480
-        max: 1771688
+        min: 12288
+        max: 1926544
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 183
+        layers_on_npu: 87
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 183
-      job_id: j2p0m2d2g
+        total_layers: 87
+      job_id: jnp109l5q
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:54:15.624495Z'
     torchscript_onnx_qnn:
-      inference_time: 'null'
-      throughput: 'null'
+      inference_time: 365.0
+      throughput: 2739.72602739726
+      estimated_peak_memory_range:
+        min: 638976
+        max: 5546832
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 89
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 89
+      job_id: jqp4qzlgo
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 248.0
+      throughput: 4032.2580645161293
       estimated_peak_memory_range:
-        min: 0
-        max: 0
-      primary_compute_unit: 'null'
-      precision: 'null'
+        min: 16384
+        max: 32361600
+      primary_compute_unit: NPU
+      precision: int8
       layer_info:
-        layers_on_npu: 0
+        layers_on_npu: 87
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 0
-      job_id: ''
-      job_status: Skipped
+        total_layers: 87
+      job_id: jz57zqrp3
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:34:34.707459Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:00:04.109028Z'
+    torchscript_onnx_qnn:
+      inference_time: 258.0
+      throughput: 3875.968992248062
+      estimated_peak_memory_range:
+        min: 618496
+        max: 47357168
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 89
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 89
+      job_id: j0pxvw9g7
+      job_status: Passed
diff --git a/qai_hub_models/models/googlenet_quantized/test.py b/qai_hub_models/models/googlenet_quantized/test.py
index 65afe84d..c116898d 100644
--- a/qai_hub_models/models/googlenet_quantized/test.py
+++ b/qai_hub_models/models/googlenet_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.googlenet_quantized.demo import main as demo_main
 from qai_hub_models.models.googlenet_quantized.model import (
@@ -25,16 +24,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        GoogLeNetQuantizable.from_pretrained(),
-        diff_tol=0.01,
-        rtol=0.02,
-        atol=0.2,
-        is_quantized=True,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/hrnet_pose/README.md b/qai_hub_models/models/hrnet_pose/README.md
index 6d3e6461..5628cf13 100644
--- a/qai_hub_models/models/hrnet_pose/README.md
+++ b/qai_hub_models/models/hrnet_pose/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of HRNetPose found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.hrnet_pose.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of HRNetPose can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212)
diff --git a/qai_hub_models/models/hrnet_pose/conftest.py b/qai_hub_models/models/hrnet_pose/conftest.py
new file mode 100644
index 00000000..1e461dec
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.hrnet_pose import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.hrnet_pose.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/hrnet_pose/demo.py b/qai_hub_models/models/hrnet_pose/demo.py
index c5844da9..52f35b3e 100644
--- a/qai_hub_models/models/hrnet_pose/demo.py
+++ b/qai_hub_models/models/hrnet_pose/demo.py
@@ -34,10 +34,10 @@ def main(is_test: bool = False):
         help="image file path or URL",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, HRNetPose.get_model_id())
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # Load image & model
-    model = demo_model_from_cli_args(HRNetPose, args)
+    model = demo_model_from_cli_args(HRNetPose, MODEL_ID, args)
     image = load_image(args.image)
     print("Model Loaded")
 
diff --git a/qai_hub_models/models/hrnet_pose/export.py b/qai_hub_models/models/hrnet_pose/export.py
index 2c4d9846..338f35fd 100644
--- a/qai_hub_models/models/hrnet_pose/export.py
+++ b/qai_hub_models/models/hrnet_pose/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image_tensor"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/hrnet_pose/info.yaml b/qai_hub_models/models/hrnet_pose/info.yaml
index e70da7c1..bf4f1b15 100644
--- a/qai_hub_models/models/hrnet_pose/info.yaml
+++ b/qai_hub_models/models/hrnet_pose/info.yaml
@@ -11,6 +11,7 @@ research_paper: https://arxiv.org/abs/1902.09212
 research_paper_title: Deep High-Resolution Representation Learning for Human Pose
   Estimation
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet
 technical_details:
@@ -30,4 +31,5 @@ related_models: [litehrnet, openpose]
 has_static_banner: yes
 has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/hrnet_pose/model.py b/qai_hub_models/models/hrnet_pose/model.py
index 065ecc06..496f5e68 100644
--- a/qai_hub_models/models/hrnet_pose/model.py
+++ b/qai_hub_models/models/hrnet_pose/model.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import sys
+from importlib import reload
 
 import torch
 import torch.nn as nn
@@ -40,9 +41,20 @@ def from_pretrained(cls) -> HRNetPose:
         ).fetch()
         weights = torch.load(weights_file, map_location="cpu")
         with SourceAsRoot(
-            SOURCE_REPOSITORY, COMMIT_HASH, MODEL_ID, MODEL_ASSET_VERSION
+            SOURCE_REPOSITORY,
+            COMMIT_HASH,
+            MODEL_ID,
+            MODEL_ASSET_VERSION,
+            keep_sys_modules=True,
         ):
             sys.path.append("./lib")
+
+            # This repository has a top-level "models", which is common. We
+            # explicitly reload it in case it has been loaded and cached by another
+            # package (or our models when executing from qai_hub_models/)
+            import models
+
+            reload(models)
             from lib.config import cfg
             from models.pose_hrnet import PoseHighResolutionNet
 
diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml
index bcbe5478..e8ac7da2 100644
--- a/qai_hub_models/models/hrnet_pose/perf.yaml
+++ b/qai_hub_models/models/hrnet_pose/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: HRNetPose
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2574.0
-      throughput: 388.5003885003885
+      inference_time: 2519.0
+      throughput: 396.9829297340214
       estimated_peak_memory_range:
-        min: 16384
-        max: 2027656
+        min: 24576
+        max: 3015464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 515
-      job_id: jwgoln14g
+      job_id: jep28oxp6
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:57:53.421052Z'
     torchscript_onnx_qnn:
-      inference_time: 2611.0
-      throughput: 382.99502106472613
+      inference_time: 2608.0
+      throughput: 383.4355828220859
       estimated_peak_memory_range:
-        min: 12288
-        max: 48352008
+        min: 49152
+        max: 58039344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: j1pvlr175
+      job_id: j1p8ojzg9
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1878.0
+      throughput: 532.4813631522896
+      estimated_peak_memory_range:
+        min: 16384
+        max: 103402912
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 515
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 515
+      job_id: j2p0yo2gw
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:51.091359Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:59:59.942614Z'
+    torchscript_onnx_qnn:
+      inference_time: 1922.0
+      throughput: 520.2913631633714
+      estimated_peak_memory_range:
+        min: 606208
+        max: 178228720
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 747
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 747
+      job_id: j1glnwepv
+      job_status: Passed
diff --git a/qai_hub_models/models/hrnet_pose/requirements.txt b/qai_hub_models/models/hrnet_pose/requirements.txt
index 69edf6ae..11ca0687 100644
--- a/qai_hub_models/models/hrnet_pose/requirements.txt
+++ b/qai_hub_models/models/hrnet_pose/requirements.txt
@@ -1,4 +1,4 @@
 yacs==0.1.8
-mmpose<=1.2.0
+mmpose==1.2.0
 mmcv==2.1.0
-mmdet<=3.2.0
+mmdet==3.2.0
diff --git a/qai_hub_models/models/hrnet_pose_quantized/README.md b/qai_hub_models/models/hrnet_pose_quantized/README.md
index 7391c232..4697d29f 100644
--- a/qai_hub_models/models/hrnet_pose_quantized/README.md
+++ b/qai_hub_models/models/hrnet_pose_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of HRNetPoseQuantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.hrnet_pose_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of HRNetPoseQuantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212)
diff --git a/qai_hub_models/models/hrnet_pose_quantized/conftest.py b/qai_hub_models/models/hrnet_pose_quantized/conftest.py
new file mode 100644
index 00000000..04066f70
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.hrnet_pose_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.hrnet_pose_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/hrnet_pose_quantized/demo.py b/qai_hub_models/models/hrnet_pose_quantized/demo.py
index e17f276e..a5eca7ae 100644
--- a/qai_hub_models/models/hrnet_pose_quantized/demo.py
+++ b/qai_hub_models/models/hrnet_pose_quantized/demo.py
@@ -35,10 +35,10 @@ def main(is_test: bool = False):
     )
 
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, HRNetPoseQuantizable.get_model_id())
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # Load image & model
-    model = demo_model_from_cli_args(HRNetPoseQuantizable, args)
+    model = demo_model_from_cli_args(HRNetPoseQuantizable, MODEL_ID, args)
     image = load_image(args.image)
     print("Model Loaded")
 
diff --git a/qai_hub_models/models/hrnet_pose_quantized/export.py b/qai_hub_models/models/hrnet_pose_quantized/export.py
index f65c3644..57904574 100644
--- a/qai_hub_models/models/hrnet_pose_quantized/export.py
+++ b/qai_hub_models/models/hrnet_pose_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -124,8 +124,8 @@ def export_model(
         + " --force_channel_last_input image_tensor"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -133,21 +133,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -159,37 +167,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/hrnet_pose_quantized/info.yaml b/qai_hub_models/models/hrnet_pose_quantized/info.yaml
index 0870433d..539d9ab1 100644
--- a/qai_hub_models/models/hrnet_pose_quantized/info.yaml
+++ b/qai_hub_models/models/hrnet_pose_quantized/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/1902.09212
 research_paper_title: Deep High-Resolution Representation Learning for Human Pose
   Estimation
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet
 technical_details:
@@ -31,4 +32,5 @@ related_models: [litehrnet, hrnet_pose]
 has_static_banner: yes
 has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml
index 8e720351..db64d4b2 100644
--- a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml
+++ b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: HRNetPoseQuantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2508.0
-      throughput: 398.72408293460927
+      inference_time: 2539.0
+      throughput: 393.8558487593541
       estimated_peak_memory_range:
-        min: 16384
-        max: 3642928
+        min: 24576
+        max: 4215600
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 515
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 515
-      job_id: jz57eljqp
+      job_id: jo5mr2wgk
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:46:47.467700Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 1859.0
+      throughput: 537.9236148466917
+      estimated_peak_memory_range:
+        min: 16384
+        max: 102354800
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 515
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 515
+      job_id: jegn2yrgo
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:23:32.990808Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:46:47.467710Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/hrnet_pose_quantized/requirements.txt b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt
index 69edf6ae..11ca0687 100644
--- a/qai_hub_models/models/hrnet_pose_quantized/requirements.txt
+++ b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt
@@ -1,4 +1,4 @@
 yacs==0.1.8
-mmpose<=1.2.0
+mmpose==1.2.0
 mmcv==2.1.0
-mmdet<=3.2.0
+mmdet==3.2.0
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md
index 94a1d034..5aae1665 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of HuggingFace-WavLM-Base-Plus found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/huggingface_wavlm_base_plus).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.huggingface_wavlm_base_plus.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of HuggingFace-WavLM-Base-Plus can be found
   [here](https://github.com/microsoft/unilm/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900)
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/conftest.py b/qai_hub_models/models/huggingface_wavlm_base_plus/conftest.py
new file mode 100644
index 00000000..1beef838
--- /dev/null
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.huggingface_wavlm_base_plus import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.huggingface_wavlm_base_plus.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
index 66e9f8b7..5237cafc 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -107,63 +107,72 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
         sample_inputs = model.sample_inputs(input_spec)
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=sample_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
     return (compile_job, profile_job, inference_job)
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/info.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/info.yaml
index 37a46dd9..ff447211 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/info.yaml
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/info.yaml
@@ -13,6 +13,7 @@ research_paper: https://arxiv.org/abs/2110.13900
 research_paper_title: 'WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack
   Speech Processing'
 license: https://github.com/microsoft/unilm/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://huggingface.co/patrickvonplaten/wavlm-libri-clean-100h-base-plus/tree/main
 technical_details:
@@ -28,8 +29,11 @@ form_factors:
   - Tablet
   - IoT
 related_models:
-  - whisper_asr
+  - whisper_tiny_en
+  - whisper_base_en
+  - whisper_small_en
 has_static_banner: yes
 has_animated_banner: yes
 license_type: mit
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py
index 6bdbbe6f..cca89a2f 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py
@@ -11,7 +11,7 @@
 from transformers import WavLMModel
 from transformers.models.wavlm.modeling_wavlm import WavLMGroupNormConvLayer
 
-from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.input_spec import InputSpec
 
 OPENPOSE_SOURCE_REPOSITORY = (
@@ -67,8 +67,8 @@ def forward(self, input: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         """
         return self.model(input)
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         sample_length: int = 80000,
     ) -> InputSpec:
@@ -169,6 +169,22 @@ def forward(self, x):
         x = torch.concat(torch.unbind(x, axis=2), axis=-1)
         return x[:, :, :-1]
 
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --compute_unit gpu"
+
+    def get_hub_profile_options(
+        self, target_runtime: TargetRuntime, other_profile_options: str = ""
+    ) -> str:
+        profile_options = super().get_hub_profile_options(
+            target_runtime, other_profile_options
+        )
+        return profile_options + " --compute_unit gpu"
+
 
 def convert_to_wavlm_npu(model: WavLMModel):
     """
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
index 3391d06e..e930ee5d 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: HuggingFace-WavLM-Base-Plus
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 463847.0
-      throughput: 2.1558832977253277
+      inference_time: 237767939.0
+      throughput: 0.0042057815036197965
       estimated_peak_memory_range:
-        min: 10719232
-        max: 13863736
-      primary_compute_unit: CPU
-      precision: fp32
+        min: 11886592
+        max: 15703120
+      primary_compute_unit: NPU
+      precision: fp16
       layer_info:
-        layers_on_npu: 0
-        layers_on_gpu: 88
-        layers_on_cpu: 748
-        total_layers: 836
-      job_id: jo5m06wyg
+        layers_on_npu: 848
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 848
+      job_id: jlpe928gr
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:13:01.265817Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 174470189.0
+      throughput: 0.005731638199807303
+      estimated_peak_memory_range:
+        min: 11321344
+        max: 711668304
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 848
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 848
+      job_id: jygzew4g8
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:32:02.862530Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:13:01.265830Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt b/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt
index 0e2962fb..657bbc9a 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt
@@ -1,4 +1,4 @@
-transformers>=4.31.0
-soundfile>=0.12.1
-librosa>=0.10.1
-datasets>=2.14.5
+transformers==4.27.4
+soundfile==0.12.1
+librosa==0.10.1
+datasets==2.14.5
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/test.py b/qai_hub_models/models/huggingface_wavlm_base_plus/test.py
index f1267acd..a69d95c4 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/test.py
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 import torch
 from datasets import load_dataset
 
@@ -65,6 +66,7 @@ def test_task():
     _test_impl(HuggingFaceWavLMBasePlusApp(HuggingFaceWavLMBasePlus.from_pretrained()))
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     _test_impl(
diff --git a/qai_hub_models/models/inception_v3/README.md b/qai_hub_models/models/inception_v3/README.md
index 63a131d6..2a8ddc93 100644
--- a/qai_hub_models/models/inception_v3/README.md
+++ b/qai_hub_models/models/inception_v3/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Inception-v3 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/inception_v3).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.inception_v3.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Inception-v3 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
diff --git a/qai_hub_models/models/inception_v3/conftest.py b/qai_hub_models/models/inception_v3/conftest.py
new file mode 100644
index 00000000..8f63d17d
--- /dev/null
+++ b/qai_hub_models/models/inception_v3/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.inception_v3 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.inception_v3.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/inception_v3/demo.py b/qai_hub_models/models/inception_v3/demo.py
index 487dadc1..51beedba 100644
--- a/qai_hub_models/models/inception_v3/demo.py
+++ b/qai_hub_models/models/inception_v3/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.inception_v3.model import InceptionNetV3
+from qai_hub_models.models.inception_v3.model import MODEL_ID, InceptionNetV3
 
 
 def main(is_test: bool = False):
-    imagenet_demo(InceptionNetV3, is_test)
+    imagenet_demo(InceptionNetV3, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/inception_v3/export.py b/qai_hub_models/models/inception_v3/export.py
index f722ee3a..461d52ea 100644
--- a/qai_hub_models/models/inception_v3/export.py
+++ b/qai_hub_models/models/inception_v3/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/inception_v3/info.yaml b/qai_hub_models/models/inception_v3/info.yaml
index 58042435..69856a43 100644
--- a/qai_hub_models/models/inception_v3/info.yaml
+++ b/qai_hub_models/models/inception_v3/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: http://arxiv.org/abs/1512.00567
 research_paper_title: Rethinking the Inception Architecture for Computer Vision
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/inception_v3/model.py b/qai_hub_models/models/inception_v3/model.py
index 66e94e93..5787e9fb 100644
--- a/qai_hub_models/models/inception_v3/model.py
+++ b/qai_hub_models/models/inception_v3/model.py
@@ -13,5 +13,7 @@
 
 
 class InceptionNetV3(ImagenetClassifier):
-    model_builder = tv_models.inception_v3
-    DEFAULT_WEIGHTS = DEFAULT_WEIGHTS
+    @classmethod
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> InceptionNetV3:
+        net = tv_models.inception_v3(weights=weights, transform_input=False)
+        return cls(net, transform_input=True)
diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml
index cbb39be7..e4c34f61 100644
--- a/qai_hub_models/models/inception_v3/perf.yaml
+++ b/qai_hub_models/models/inception_v3/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Inception-v3
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1944.0
-      throughput: 514.40329218107
+      inference_time: 1435.0
+      throughput: 696.8641114982578
       estimated_peak_memory_range:
-        min: 24576
-        max: 2564456
+        min: 20480
+        max: 1921832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 141
+        layers_on_npu: 131
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 141
-      job_id: j1p8em8zp
+        total_layers: 131
+      job_id: jqpyeorgy
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:30:27.461416Z'
     torchscript_onnx_qnn:
-      inference_time: 2266.0
-      throughput: 441.306266548985
+      inference_time: 1475.0
+      throughput: 677.9661016949152
+      estimated_peak_memory_range:
+        min: 20480
+        max: 148512392
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 220
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 220
+      job_id: j1p8oezg9
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1069.0
+      throughput: 935.4536950420954
       estimated_peak_memory_range:
-        min: 360448
-        max: 133509928
+        min: 12288
+        max: 50854560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 232
+        layers_on_npu: 131
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 232
-      job_id: jogk2qdyg
+        total_layers: 131
+      job_id: j2p0ym2gw
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:21:24.010787Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:33:41.582505Z'
+    torchscript_onnx_qnn:
+      inference_time: 1082.0
+      throughput: 924.2144177449168
+      estimated_peak_memory_range:
+        min: 618496
+        max: 68383952
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 220
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 220
+      job_id: jogkz2ygd
+      job_status: Passed
diff --git a/qai_hub_models/models/inception_v3/test.py b/qai_hub_models/models/inception_v3/test.py
index 95c0dc88..8074a890 100644
--- a/qai_hub_models/models/inception_v3/test.py
+++ b/qai_hub_models/models/inception_v3/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(InceptionNetV3.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(InceptionNetV3.from_pretrained())
 
diff --git a/qai_hub_models/models/inception_v3_quantized/README.md b/qai_hub_models/models/inception_v3_quantized/README.md
index 2dbca972..93531fc0 100644
--- a/qai_hub_models/models/inception_v3_quantized/README.md
+++ b/qai_hub_models/models/inception_v3_quantized/README.md
@@ -1,16 +1,16 @@
 [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
 
 
-# [Inception-v3Quantized: Quantized Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/inception_v3_quantized)
+# [Inception-v3-Quantized: Quantized Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/inception_v3_quantized)
 
 InceptionNetV3 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. This model is post-training quantized to int8 using samples from [Google's open images dataset](https://storage.googleapis.com/openimages/web/index.html).
 
-This is based on the implementation of Inception-v3Quantized found
+This is based on the implementation of Inception-v3-Quantized found
 [here](https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py). This repository contains scripts for optimized on-device
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/inception_v3_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.inception_v3_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -39,9 +39,9 @@ Additional options are documented with the `--help` option. Note that the above
 script requires access to Deployment instructions for Qualcomm® AI Hub.
 
 ## License
-- The license for the original implementation of Inception-v3Quantized can be found
+- The license for the original implementation of Inception-v3-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
diff --git a/qai_hub_models/models/inception_v3_quantized/conftest.py b/qai_hub_models/models/inception_v3_quantized/conftest.py
new file mode 100644
index 00000000..5ab488cb
--- /dev/null
+++ b/qai_hub_models/models/inception_v3_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.inception_v3_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.inception_v3_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/inception_v3_quantized/demo.py b/qai_hub_models/models/inception_v3_quantized/demo.py
index e17bb3be..5443e0ef 100644
--- a/qai_hub_models/models/inception_v3_quantized/demo.py
+++ b/qai_hub_models/models/inception_v3_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.inception_v3_quantized.model import InceptionNetV3Quantizable
+from qai_hub_models.models.inception_v3_quantized.model import (
+    MODEL_ID,
+    InceptionNetV3Quantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(InceptionNetV3Quantizable, is_test)
+    imagenet_demo(InceptionNetV3Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py
index 816369ff..a5b83180 100644
--- a/qai_hub_models/models/inception_v3_quantized/export.py
+++ b/qai_hub_models/models/inception_v3_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -89,7 +89,7 @@ def export_model(
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "inception_v3_quantized",
-            "Inception-v3Quantized",
+            "Inception-v3-Quantized",
             device,
             skip_profiling,
             skip_inferencing,
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,35 +163,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/inception_v3_quantized/info.yaml b/qai_hub_models/models/inception_v3_quantized/info.yaml
index e7da6c74..06f3bf87 100644
--- a/qai_hub_models/models/inception_v3_quantized/info.yaml
+++ b/qai_hub_models/models/inception_v3_quantized/info.yaml
@@ -1,4 +1,4 @@
-name: Inception-v3Quantized
+name: Inception-v3-Quantized
 # id must match with the model dir name in qai_hub_models
 id: inception_v3_quantized
 status: public
@@ -15,12 +15,13 @@ tags:
 research_paper: http://arxiv.org/abs/1512.00567
 research_paper_title: Rethinking the Inception Architecture for Computer Vision
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py
 technical_details:
   Model checkpoint: Imagenet
   Input resolution: 224x224
-  Number of parameters: 23.8M
-  Model size: 65.6 MB
+  Number of parameters: 23.9M
+  Model size: 23.3 MB
 applicable_scenarios:
   - Medical Imaging
   - Anomaly Detection
@@ -38,6 +39,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/inception_v3_quantized/model.py b/qai_hub_models/models/inception_v3_quantized/model.py
index cb320154..39dd2658 100644
--- a/qai_hub_models/models/inception_v3_quantized/model.py
+++ b/qai_hub_models/models/inception_v3_quantized/model.py
@@ -8,26 +8,29 @@
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet import (
     AIMETQuantizableMixin,
-    HubCompileOptionsInt8Mixin,
 )
 
 # isort: on
 
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.inception_v3.model import InceptionNetV3
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+from qai_hub_models.utils.quantization_aimet import tie_aimet_observer_groups
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 3
+MODEL_ASSET_VERSION = 4
 DEFAULT_ENCODINGS = "inception_v3_quantized_encodings.json"
 
 
 class InceptionNetV3Quantizable(
-    HubCompileOptionsInt8Mixin, AIMETQuantizableMixin, InceptionNetV3
+    AIMETQuantizableMixin,
+    InceptionNetV3,
 ):
     """InceptionNetV3 with post train quantization support.
 
@@ -40,14 +43,20 @@ def __init__(
     ) -> None:
         InceptionNetV3.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=True
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
         aimet_encodings: str | None = "DEFAULT",
-    ) -> "InceptionNetV3":
+    ) -> "InceptionNetV3Quantizable":
         """
         Parameters:
           aimet_encodings:
@@ -56,17 +65,19 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = InceptionNetV3.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
 
+        model = prepare_model(model)
         equalize_model(model, input_shape)
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
+            config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
+        cls._tie_pre_concat_quantizers(sim)
 
         if aimet_encodings:
             if aimet_encodings == "DEFAULT":
@@ -77,3 +88,119 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    @classmethod
+    def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel):
+        """
+        This ties together the output quantizers prior to concatenations. This
+        prevents unnecessary re-quantization during the concatenation, and even
+        avoids fatal TFLite converter errors.
+        """
+
+        n = sim.model.net
+        groups = [
+            [
+                n.maxpool2,
+                n.Mixed_5b.module_avg_pool2d,
+            ],
+            [
+                n.Mixed_5b.branch1x1.module_relu_5,
+                n.Mixed_5b.branch5x5_2.module_relu_7,
+                n.Mixed_5b.branch3x3dbl_3.module_relu_10,
+                n.Mixed_5b.branch_pool.module_relu_11,
+                n.Mixed_5b.module_cat,
+                n.Mixed_5c.module_avg_pool2d_1,
+            ],
+            [
+                n.Mixed_5c.branch1x1.module_relu_12,
+                n.Mixed_5c.branch5x5_2.module_relu_14,
+                n.Mixed_5c.branch3x3dbl_3.module_relu_17,
+                n.Mixed_5c.branch_pool.module_relu_18,
+                n.Mixed_5c.module_cat_1,
+                n.Mixed_5d.module_avg_pool2d_2,
+            ],
+            [
+                n.Mixed_5d.branch1x1.module_relu_19,
+                n.Mixed_5d.branch5x5_2.module_relu_21,
+                n.Mixed_5d.branch3x3dbl_3.module_relu_24,
+                n.Mixed_5d.branch_pool.module_relu_25,
+                n.Mixed_5d.module_cat_2,
+                # This group has a branch with only a max pool,
+                # this requires the two concat groups to merge
+                n.Mixed_6a.branch3x3.module_relu_26,
+                n.Mixed_6a.branch3x3dbl_3.module_relu_29,
+                n.Mixed_6a.module_max_pool2d,
+                n.Mixed_6a.module_cat_3,
+                n.Mixed_6b.module_avg_pool2d_3,
+            ],
+            [
+                n.Mixed_6b.branch1x1.module_relu_30,
+                n.Mixed_6b.branch7x7_3.module_relu_33,
+                n.Mixed_6b.branch7x7dbl_5.module_relu_38,
+                n.Mixed_6b.branch_pool.module_relu_39,
+                n.Mixed_6b.module_cat_4,
+                n.Mixed_6c.module_avg_pool2d_4,
+            ],
+            [
+                n.Mixed_6c.branch1x1.module_relu_40,
+                n.Mixed_6c.branch7x7_3.module_relu_43,
+                n.Mixed_6c.branch7x7dbl_5.module_relu_48,
+                n.Mixed_6c.branch_pool.module_relu_49,
+                n.Mixed_6c.module_cat_5,
+                n.Mixed_6d.module_avg_pool2d_5,
+            ],
+            [
+                n.Mixed_6d.branch1x1.module_relu_50,
+                n.Mixed_6d.branch7x7_3.module_relu_53,
+                n.Mixed_6d.branch7x7dbl_5.module_relu_58,
+                n.Mixed_6d.branch_pool.module_relu_59,
+                n.Mixed_6d.module_cat_6,
+                n.Mixed_6e.module_avg_pool2d_6,
+            ],
+            [
+                n.Mixed_6e.branch1x1.module_relu_60,
+                n.Mixed_6e.branch7x7_3.module_relu_63,
+                n.Mixed_6e.branch7x7dbl_5.module_relu_68,
+                n.Mixed_6e.branch_pool.module_relu_69,
+                n.Mixed_6e.module_cat_7,
+                # This group has a branch with only a max pool,
+                # this requires the two concat groups to merge
+                n.Mixed_7a.branch3x3_2.module_relu_71,
+                n.Mixed_7a.branch7x7x3_4.module_relu_75,
+                n.Mixed_7a.module_max_pool2d_1,
+                n.Mixed_7a.module_cat_8,
+                n.Mixed_7b.module_avg_pool2d_7,
+            ],
+            [
+                n.Mixed_7b.branch1x1.module_relu_76,
+                n.Mixed_7b.branch3x3_2a.module_relu_78,
+                n.Mixed_7b.branch3x3_2b.module_relu_79,
+                n.Mixed_7b.branch3x3dbl_3a.module_relu_82,
+                n.Mixed_7b.branch3x3dbl_3b.module_relu_83,
+                n.Mixed_7b.branch_pool.module_relu_84,
+                n.Mixed_7b.module_cat_9,
+                n.Mixed_7b.module_cat_10,
+                n.Mixed_7b.module_cat_11,
+                n.Mixed_7c.module_avg_pool2d_8,
+            ],
+            [
+                n.Mixed_7c.branch1x1.module_relu_85,
+                n.Mixed_7c.branch3x3_2a.module_relu_87,
+                n.Mixed_7c.branch3x3_2b.module_relu_88,
+                n.Mixed_7c.branch3x3dbl_3a.module_relu_91,
+                n.Mixed_7c.branch3x3dbl_3b.module_relu_92,
+                n.Mixed_7c.branch_pool.module_relu_93,
+                n.Mixed_7c.module_cat_12,
+                n.Mixed_7c.module_cat_13,
+                n.Mixed_7c.module_cat_14,
+            ],
+        ]
+        tie_aimet_observer_groups(groups)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml
index ec2936b4..b06c4a03 100644
--- a/qai_hub_models/models/inception_v3_quantized/perf.yaml
+++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
-- name: Inception-v3Quantized
+- name: Inception-v3-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 755.0
-      throughput: 1324.5033112582782
+      inference_time: 615.0
+      throughput: 1626.0162601626016
       estimated_peak_memory_range:
-        min: 12288
-        max: 1825256
+        min: 36864
+        max: 2508048
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 150
+        layers_on_npu: 144
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 150
-      job_id: jygzlmjo5
+        total_layers: 144
+      job_id: jz57zj9p3
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:41:04.203939Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 487.0
+      throughput: 2053.388090349076
+      estimated_peak_memory_range:
+        min: 0
+        max: 63551712
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: j0pxv7lg7
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-23T04:42:57.781769Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:41:04.203947Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/inception_v3_quantized/test.py b/qai_hub_models/models/inception_v3_quantized/test.py
index ce2cba52..486a8cee 100644
--- a/qai_hub_models/models/inception_v3_quantized/test.py
+++ b/qai_hub_models/models/inception_v3_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.inception_v3_quantized.demo import main as demo_main
 from qai_hub_models.models.inception_v3_quantized.model import (
@@ -25,16 +24,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        InceptionNetV3Quantizable.from_pretrained(),
-        diff_tol=0.01,
-        rtol=0.02,
-        atol=0.2,
-        is_quantized=True,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/lama_dilated/README.md b/qai_hub_models/models/lama_dilated/README.md
index 082fb509..6f36b28f 100644
--- a/qai_hub_models/models/lama_dilated/README.md
+++ b/qai_hub_models/models/lama_dilated/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of LaMa-Dilated found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/lama_dilated).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.lama_dilated.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of LaMa-Dilated can be found
   [here](https://github.com/advimman/lama/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Resolution-robust Large Mask Inpainting with Fourier Convolutions](https://arxiv.org/abs/2109.07161)
diff --git a/qai_hub_models/models/lama_dilated/conftest.py b/qai_hub_models/models/lama_dilated/conftest.py
new file mode 100644
index 00000000..f61093bc
--- /dev/null
+++ b/qai_hub_models/models/lama_dilated/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.lama_dilated import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.lama_dilated.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/lama_dilated/demo.py b/qai_hub_models/models/lama_dilated/demo.py
index adabbf3f..f9778ff1 100644
--- a/qai_hub_models/models/lama_dilated/demo.py
+++ b/qai_hub_models/models/lama_dilated/demo.py
@@ -19,7 +19,7 @@
 
 
 def main(is_test: bool = False):
-    repaint_demo(LamaDilated, IMAGE_ADDRESS, MASK_ADDRESS, is_test)
+    repaint_demo(LamaDilated, MODEL_ID, IMAGE_ADDRESS, MASK_ADDRESS, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py
index 387f5e9c..02ae013e 100644
--- a/qai_hub_models/models/lama_dilated/export.py
+++ b/qai_hub_models/models/lama_dilated/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image,mask"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image,mask", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/lama_dilated/info.yaml b/qai_hub_models/models/lama_dilated/info.yaml
index 54e07b78..d4ffe566 100644
--- a/qai_hub_models/models/lama_dilated/info.yaml
+++ b/qai_hub_models/models/lama_dilated/info.yaml
@@ -12,6 +12,7 @@ tags:
 research_paper: https://arxiv.org/abs/2109.07161
 research_paper_title: Resolution-robust Large Mask Inpainting with Fourier Convolutions
 license: https://github.com/advimman/lama/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/advimman/lama
 technical_details:
   Model checkpoint: Dilated CelebAHQ
@@ -28,4 +29,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/lama_dilated/model.py b/qai_hub_models/models/lama_dilated/model.py
index fff9bf32..a0cbc86d 100644
--- a/qai_hub_models/models/lama_dilated/model.py
+++ b/qai_hub_models/models/lama_dilated/model.py
@@ -4,6 +4,9 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
+import logging
+from importlib import reload
+
 import torch
 from omegaconf import OmegaConf
 
@@ -12,6 +15,7 @@
     SourceAsRoot,
     load_json,
     load_torch,
+    set_log_level,
 )
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.input_spec import InputSpec
@@ -71,8 +75,8 @@ def forward(self, image: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
         inpainted = mask * predicted_image + (1 - mask) * image
         return inpainted
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         num_channels: int = 3,
         height: int = 512,
@@ -110,6 +114,13 @@ def _load_lama_dilated_source_model_from_weights(weights_name: str) -> torch.nn.
     with SourceAsRoot(
         LAMA_SOURCE_REPOSITORY, LAMA_SOURCE_REPO_COMMIT, MODEL_ID, MODEL_ASSET_VERSION
     ):
+        # This repository has a top-level "models", which is common. We
+        # explicitly reload it in case it has been loaded and cached by another
+        # package (or our models when executing from qai_hub_models/)
+        import models
+
+        reload(models)
+
         # Import module
         from saicinpainting.training.trainers.default import (
             DefaultInpaintingTrainingModule,
@@ -122,7 +133,8 @@ def _load_lama_dilated_source_model_from_weights(weights_name: str) -> torch.nn.
         kwargs.pop("kind")
         kwargs["use_ddp"] = True
         state = load_torch(weights_url)
-        lama_dilated_model = DefaultInpaintingTrainingModule(config, **kwargs)
+        with set_log_level(logging.WARN):
+            lama_dilated_model = DefaultInpaintingTrainingModule(config, **kwargs)
         lama_dilated_model.load_state_dict(state["state_dict"], strict=False)
         lama_dilated_model.on_load_checkpoint(state)
         lama_dilated_model.freeze()
diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml
index f951db3e..af8a440d 100644
--- a/qai_hub_models/models/lama_dilated/perf.yaml
+++ b/qai_hub_models/models/lama_dilated/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: LaMa-Dilated
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 88596.0
-      throughput: 11.287191295318072
+      inference_time: 88628.0
+      throughput: 11.283115945299453
       estimated_peak_memory_range:
-        min: 3289088
-        max: 139215624
+        min: 3252224
+        max: 140731056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 346
-      job_id: jqpyojvr5
+      job_id: j2p0yv0gw
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:53:06.506039Z'
     torchscript_onnx_qnn:
-      inference_time: 84076.0
-      throughput: 11.894000666064038
+      inference_time: 84164.0
+      throughput: 11.881564564421843
       estimated_peak_memory_range:
-        min: 4313088
-        max: 34733320
+        min: 4321280
+        max: 33964280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: j2p0m2e2g
+      job_id: j1gln12pv
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 62025.0
+      throughput: 16.12253123740427
+      estimated_peak_memory_range:
+        min: 225280
+        max: 245293744
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 346
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 346
+      job_id: jogkz9vgd
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:27:42.653097Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:58:58.279247Z'
+    torchscript_onnx_qnn:
+      inference_time: 58950.0
+      throughput: 16.963528413910094
+      estimated_peak_memory_range:
+        min: 78331904
+        max: 243926976
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 333
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 333
+      job_id: jw566dn5o
+      job_status: Passed
diff --git a/qai_hub_models/models/lama_dilated/requirements.txt b/qai_hub_models/models/lama_dilated/requirements.txt
index a21b654f..52c75e34 100644
--- a/qai_hub_models/models/lama_dilated/requirements.txt
+++ b/qai_hub_models/models/lama_dilated/requirements.txt
@@ -1,10 +1,9 @@
-matplotlib
-pandas
+matplotlib==3.7.4
 albumentations==0.5.2
 pytorch-lightning==1.6.0
-webdataset
+webdataset==0.2.86
 easydict==1.10
 kornia==0.5.0
 hydra-core==1.3.0
-omegaconf==2.3.0
-scikit-learn==1.3.0
+scikit-learn==1.1.3
+tensorboard==2.13.0
diff --git a/qai_hub_models/models/lama_dilated/test.py b/qai_hub_models/models/lama_dilated/test.py
index 47b13144..2dbd27d3 100644
--- a/qai_hub_models/models/lama_dilated/test.py
+++ b/qai_hub_models/models/lama_dilated/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 
 from qai_hub_models.models._shared.repaint.app import RepaintMaskApp
 from qai_hub_models.models.lama_dilated.demo import IMAGE_ADDRESS, MASK_ADDRESS
@@ -38,6 +39,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     net = LamaDilated.from_pretrained()
diff --git a/qai_hub_models/models/litehrnet/README.md b/qai_hub_models/models/litehrnet/README.md
index 0c628f39..7e786799 100644
--- a/qai_hub_models/models/litehrnet/README.md
+++ b/qai_hub_models/models/litehrnet/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of LiteHRNet found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/litehrnet).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.litehrnet.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of LiteHRNet can be found
   [here](https://github.com/HRNet/Lite-HRNet/blob/hrnet/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Lite-HRNet: A Lightweight High-Resolution Network](https://arxiv.org/abs/2104.06403)
diff --git a/qai_hub_models/models/litehrnet/conftest.py b/qai_hub_models/models/litehrnet/conftest.py
new file mode 100644
index 00000000..2ee063ae
--- /dev/null
+++ b/qai_hub_models/models/litehrnet/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.litehrnet import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.litehrnet.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/litehrnet/demo.py b/qai_hub_models/models/litehrnet/demo.py
index 8ad0ad05..d9e9e20d 100644
--- a/qai_hub_models/models/litehrnet/demo.py
+++ b/qai_hub_models/models/litehrnet/demo.py
@@ -39,8 +39,8 @@ def main(is_test: bool = False):
     )
     args = parser.parse_args([] if is_test else None)
     litehrnet_model = model_from_cli_args(LiteHRNet, args)
-    hub_model = demo_model_from_cli_args(LiteHRNet, args)
-    validate_on_device_demo_args(args, LiteHRNet.get_model_id())
+    hub_model = demo_model_from_cli_args(LiteHRNet, MODEL_ID, args)
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # Load image & model
     image = load_image(args.image)
diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py
index b163c9d0..13f51b82 100644
--- a/qai_hub_models/models/litehrnet/export.py
+++ b/qai_hub_models/models/litehrnet/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -108,66 +108,76 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
         sample_inputs = model.sample_inputs(input_spec)
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=sample_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/litehrnet/info.yaml b/qai_hub_models/models/litehrnet/info.yaml
index 9e62d950..31da35bd 100644
--- a/qai_hub_models/models/litehrnet/info.yaml
+++ b/qai_hub_models/models/litehrnet/info.yaml
@@ -11,6 +11,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2104.06403
 research_paper_title: 'Lite-HRNet: A Lightweight High-Resolution Network'
 license: https://github.com/HRNet/Lite-HRNet/blob/hrnet/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/HRNet/Lite-HRNet
 technical_details:
   Input resolution: 256x192
@@ -28,4 +29,5 @@ related_models: [openpose, hrnet_pose]
 has_static_banner: yes
 has_animated_banner: no
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/litehrnet/model.py b/qai_hub_models/models/litehrnet/model.py
index e2c31db2..aa2baf06 100644
--- a/qai_hub_models/models/litehrnet/model.py
+++ b/qai_hub_models/models/litehrnet/model.py
@@ -80,8 +80,8 @@ def forward(
 
         return keypoints, scores, heatmaps
 
+    @staticmethod
     def get_input_spec(
-        self,
         num_channels: int = 3,
         height: int = 256,
         width: int = 192,
diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml
index 2b7cc7c9..d1a63bd8 100644
--- a/qai_hub_models/models/litehrnet/perf.yaml
+++ b/qai_hub_models/models/litehrnet/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: LiteHRNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 15966.0
-      throughput: 62.63309532757109
+      inference_time: 15866.0
+      throughput: 63.02785831337451
       estimated_peak_memory_range:
-        min: 6561792
-        max: 13503904
+        min: 6811648
+        max: 10391632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: jqp4ydwqp
+      job_id: jn5q83o57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:41:50.802497Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 10704.0
+      throughput: 93.42301943198804
+      estimated_peak_memory_range:
+        min: 20480
+        max: 71674208
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1226
+        layers_on_gpu: 0
+        layers_on_cpu: 10
+        total_layers: 1236
+      job_id: j1glnkmpv
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:25:31.033915Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:41:50.802505Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/litehrnet/requirements.txt b/qai_hub_models/models/litehrnet/requirements.txt
index 048feb99..64d03ea1 100644
--- a/qai_hub_models/models/litehrnet/requirements.txt
+++ b/qai_hub_models/models/litehrnet/requirements.txt
@@ -1,3 +1,3 @@
-mmpose<=1.2.0
+mmpose==1.2.0
 mmcv==2.1.0
-mmdet<=3.2.0
+mmdet==3.2.0
diff --git a/qai_hub_models/models/litehrnet/test.py b/qai_hub_models/models/litehrnet/test.py
index 3fe634ef..6cc5b848 100644
--- a/qai_hub_models/models/litehrnet/test.py
+++ b/qai_hub_models/models/litehrnet/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 
 from qai_hub_models.models.litehrnet.app import LiteHRNetApp
 from qai_hub_models.models.litehrnet.demo import IMAGE_ADDRESS
@@ -52,6 +53,7 @@ def test_task():
     _test_impl(LiteHRNetApp(litehrnet, litehrnet.inferencer))
 
 
+@pytest.mark.trace
 def test_trace():
     litehrnet = LiteHRNet.from_pretrained()
     _test_impl(LiteHRNetApp(litehrnet.convert_to_torchscript(), litehrnet.inferencer))
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
index a5546eed..c6955075 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Llama-v2-7B-Chat found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -18,7 +18,7 @@ a hosted Qualcomm® device.
 ## License
 - The license for the original implementation of Llama-v2-7B-Chat can be found
   [here](https://github.com/facebookresearch/llama/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml
index 584f2f8b..8a9faf98 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml
@@ -18,6 +18,7 @@ tags:
 research_paper: https://arxiv.org/abs/2302.13971
 research_paper_title: "LLaMA: Open and Efficient Foundation Language Models"
 license: https://github.com/facebookresearch/llama/blob/main/LICENSE
+deploy_license: https://github.com/facebookresearch/llama/blob/main/LICENSE
 source_repo: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
 technical_details:
   Number of parameters: 7B
@@ -43,4 +44,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: llama2
+deploy_license_type: llama2
 dataset: []
diff --git a/qai_hub_models/models/mediapipe_face/README.md b/qai_hub_models/models/mediapipe_face/README.md
index 2e701b97..7ff2956c 100644
--- a/qai_hub_models/models/mediapipe_face/README.md
+++ b/qai_hub_models/models/mediapipe_face/README.md
@@ -10,17 +10,12 @@ This is based on the implementation of MediaPipe-Face-Detection found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_face).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
 ## Example & Usage
 
-Install the package via pip:
-```bash
-pip install "qai_hub_models[mediapipe_face]"
-```
-
 
 Once installed, run the following simple CLI demo:
 
@@ -30,7 +25,7 @@ python -m qai_hub_models.models.mediapipe_face.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Face-Detection can be found
   [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs](https://arxiv.org/abs/1907.05047)
diff --git a/qai_hub_models/models/mediapipe_face/conftest.py b/qai_hub_models/models/mediapipe_face/conftest.py
new file mode 100644
index 00000000..80a9de2b
--- /dev/null
+++ b/qai_hub_models/models/mediapipe_face/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mediapipe_face import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mediapipe_face.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mediapipe_face/export.py b/qai_hub_models/models/mediapipe_face/export.py
index c7f32d04..fe627869 100644
--- a/qai_hub_models/models/mediapipe_face/export.py
+++ b/qai_hub_models/models/mediapipe_face/export.py
@@ -10,14 +10,14 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
 
 from qai_hub_models.models.mediapipe_face import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -90,9 +90,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or ALL_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "mediapipe_face",
@@ -111,68 +111,85 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "MediaPipeFaceDetector" in components:
-        components_dict["MediaPipeFaceDetector"] = model.face_detector
+        components_dict["MediaPipeFaceDetector"] = model.face_detector  # type: ignore
     if "MediaPipeFaceLandmarkDetector" in components:
-        components_dict["MediaPipeFaceLandmarkDetector"] = model.face_landmark_detector
+        components_dict["MediaPipeFaceLandmarkDetector"] = model.face_landmark_detector  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
             target_runtime, compile_options + " --force_channel_last_input image"
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
             # Convert inputs from channel first to channel last
             hub_inputs = transpose_channel_first_to_last(
                 "image", sample_inputs, target_runtime
             )
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=hub_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -181,8 +198,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -190,8 +207,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/mediapipe_face/info.yaml b/qai_hub_models/models/mediapipe_face/info.yaml
index 6f80819c..ec898a06 100644
--- a/qai_hub_models/models/mediapipe_face/info.yaml
+++ b/qai_hub_models/models/mediapipe_face/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/1907.05047
 research_paper_title: 'BlazeFace: Sub-millisecond Neural Face Detection on Mobile
   GPUs'
 license: https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/zmurez/MediaPipePyTorch/
 technical_details:
   Input resolution: 256x256
@@ -36,4 +37,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/mediapipe_face/model.py b/qai_hub_models/models/mediapipe_face/model.py
index 89844824..29b79435 100644
--- a/qai_hub_models/models/mediapipe_face/model.py
+++ b/qai_hub_models/models/mediapipe_face/model.py
@@ -246,7 +246,8 @@ def from_pretrained(
             face_detector.load_anchors(detector_anchors)
             return cls(face_detector, face_detector.anchors)
 
-    def get_input_spec(self, batch_size: int = BATCH_SIZE) -> InputSpec:
+    @staticmethod
+    def get_input_spec(batch_size: int = BATCH_SIZE) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type) of the face detector.
         This can be used to submit profiling job on Qualcomm AI Hub.
@@ -274,7 +275,8 @@ def from_pretrained(cls, landmark_detector_weights: str = "blazeface_landmark.pt
             face_regressor.load_weights(landmark_detector_weights)
             return cls(face_regressor)
 
-    def get_input_spec(self, batch_size: int = BATCH_SIZE) -> InputSpec:
+    @staticmethod
+    def get_input_spec(batch_size: int = BATCH_SIZE) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type) of the face landmark detector.
         This can be used to submit profiling job on Qualcomm AI Hub.
diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml
index 3df1ee28..bf104ca8 100644
--- a/qai_hub_models/models/mediapipe_face/perf.yaml
+++ b/qai_hub_models/models/mediapipe_face/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MediaPipeFaceDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 536.0
-      throughput: 1865.6716417910447
+      inference_time: 532.0
+      throughput: 1879.6992481203008
       estimated_peak_memory_range:
         min: 12288
-        max: 1539856
+        max: 1591696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,39 +43,77 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 111
-      job_id: jqp4ydjqp
+      job_id: jn5q8nm57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:50.277943Z'
     torchscript_onnx_qnn:
-      inference_time: 592.0
-      throughput: 1689.1891891891892
+      inference_time: 535.0
+      throughput: 1869.1588785046729
+      estimated_peak_memory_range:
+        min: 16384
+        max: 4401872
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 111
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 111
+      job_id: jwgoyxd58
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 380.0
+      throughput: 2631.5789473684213
       estimated_peak_memory_range:
-        min: 802816
-        max: 57565728
+        min: 12288
+        max: 27416464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 147
+        layers_on_npu: 111
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 147
-      job_id: jo5m06vyg
+        total_layers: 111
+      job_id: jw566x75o
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:08:54.792595Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:15:59.596663Z'
+    torchscript_onnx_qnn:
+      inference_time: 381.0
+      throughput: 2624.6719160104985
+      estimated_peak_memory_range:
+        min: 12288
+        max: 26948416
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 111
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 111
+      job_id: j7gjx98pd
+      job_status: Passed
 - name: MediaPipeFaceLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 209.0
-      throughput: 4784.688995215311
+      inference_time: 211.0
+      throughput: 4739.336492890995
       estimated_peak_memory_range:
         min: 24576
-        max: 1806472
+        max: 1810232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -80,28 +121,66 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: j0pxl6ejp
+      job_id: j1glndlpv
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:24:30.398348Z'
     torchscript_onnx_qnn:
-      inference_time: 286.0
-      throughput: 3496.5034965034965
+      inference_time: 210.0
+      throughput: 4761.9047619047615
       estimated_peak_memory_range:
-        min: 462848
-        max: 8766648
+        min: 28672
+        max: 1684984
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 106
+        layers_on_npu: 100
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 106
-      job_id: jegnzmxvg
+        total_layers: 100
+      job_id: j1pv38m5x
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 159.0
+      throughput: 6289.308176100629
+      estimated_peak_memory_range:
+        min: 12288
+        max: 24695408
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 100
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 100
+      job_id: j1p3kdz52
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:12:20.881454Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:28:56.546828Z'
+    torchscript_onnx_qnn:
+      inference_time: 156.0
+      throughput: 6410.25641025641
+      estimated_peak_memory_range:
+        min: 16384
+        max: 24996560
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 100
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 100
+      job_id: jlpe9q0gr
+      job_status: Passed
diff --git a/qai_hub_models/models/mediapipe_face/requirements.txt b/qai_hub_models/models/mediapipe_face/requirements.txt
deleted file mode 100644
index 9c11ddeb..00000000
--- a/qai_hub_models/models/mediapipe_face/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-opencv-python
-requests
diff --git a/qai_hub_models/models/mediapipe_hand/README.md b/qai_hub_models/models/mediapipe_hand/README.md
index 8aa4b4e1..ff64fe98 100644
--- a/qai_hub_models/models/mediapipe_hand/README.md
+++ b/qai_hub_models/models/mediapipe_hand/README.md
@@ -10,17 +10,12 @@ This is based on the implementation of MediaPipe-Hand-Detection found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_hand).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
 ## Example & Usage
 
-Install the package via pip:
-```bash
-pip install "qai_hub_models[mediapipe_hand]"
-```
-
 
 Once installed, run the following simple CLI demo:
 
@@ -30,7 +25,7 @@ python -m qai_hub_models.models.mediapipe_hand.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Hand-Detection can be found
   [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [MediaPipe Hands: On-device Real-time Hand Tracking](https://arxiv.org/abs/2006.10214)
diff --git a/qai_hub_models/models/mediapipe_hand/conftest.py b/qai_hub_models/models/mediapipe_hand/conftest.py
new file mode 100644
index 00000000..8848bec2
--- /dev/null
+++ b/qai_hub_models/models/mediapipe_hand/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mediapipe_hand import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mediapipe_hand.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py
index 7d92ac14..22ff6f03 100644
--- a/qai_hub_models/models/mediapipe_hand/export.py
+++ b/qai_hub_models/models/mediapipe_hand/export.py
@@ -10,14 +10,14 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
 
 from qai_hub_models.models.mediapipe_hand import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -90,9 +90,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or ALL_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "mediapipe_hand",
@@ -111,68 +111,85 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "MediaPipeHandDetector" in components:
-        components_dict["MediaPipeHandDetector"] = model.hand_detector
+        components_dict["MediaPipeHandDetector"] = model.hand_detector  # type: ignore
     if "MediaPipeHandLandmarkDetector" in components:
-        components_dict["MediaPipeHandLandmarkDetector"] = model.hand_landmark_detector
+        components_dict["MediaPipeHandLandmarkDetector"] = model.hand_landmark_detector  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
             target_runtime, compile_options + " --force_channel_last_input image"
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
             # Convert inputs from channel first to channel last
             hub_inputs = transpose_channel_first_to_last(
                 "image", sample_inputs, target_runtime
             )
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=hub_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -181,8 +198,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -190,8 +207,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/mediapipe_hand/info.yaml b/qai_hub_models/models/mediapipe_hand/info.yaml
index 32d75fb9..9266f51a 100644
--- a/qai_hub_models/models/mediapipe_hand/info.yaml
+++ b/qai_hub_models/models/mediapipe_hand/info.yaml
@@ -12,6 +12,7 @@ tags:
 research_paper: https://arxiv.org/abs/2006.10214
 research_paper_title: 'MediaPipe Hands: On-device Real-time Hand Tracking'
 license: https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/zmurez/MediaPipePyTorch/
 technical_details:
   Input resolution: 256x256
@@ -34,4 +35,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/mediapipe_hand/model.py b/qai_hub_models/models/mediapipe_hand/model.py
index b41542ef..25d2c4e5 100644
--- a/qai_hub_models/models/mediapipe_hand/model.py
+++ b/qai_hub_models/models/mediapipe_hand/model.py
@@ -142,7 +142,8 @@ def from_pretrained(
             hand_detector.load_anchors(detector_anchors)
             return cls(hand_detector, hand_detector.anchors)
 
-    def get_input_spec(self, batch_size: int = BATCH_SIZE) -> InputSpec:
+    @staticmethod
+    def get_input_spec(batch_size: int = BATCH_SIZE) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type) of the hand detector.
         This can be used to submit profiling job on Qualcomm AI Hub.
@@ -170,7 +171,8 @@ def from_pretrained(cls, landmark_detector_weights: str = "blazehand_landmark.pt
             hand_regressor.load_weights(landmark_detector_weights)
             cls(hand_regressor)
 
-    def get_input_spec(self, batch_size: int = BATCH_SIZE) -> InputSpec:
+    @staticmethod
+    def get_input_spec(batch_size: int = BATCH_SIZE) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type) of the hand landmark detector.
         This can be used to submit profiling job on Qualcomm AI Hub.
diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml
index f79bb0af..dad8f571 100644
--- a/qai_hub_models/models/mediapipe_hand/perf.yaml
+++ b/qai_hub_models/models/mediapipe_hand/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MediaPipeHandDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 762.0
-      throughput: 1312.3359580052493
+      inference_time: 765.0
+      throughput: 1307.18954248366
       estimated_peak_memory_range:
         min: 12288
-        max: 3281536
+        max: 12061368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,39 +43,77 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 151
-      job_id: jwgolne4g
+      job_id: jep28dxp6
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:23:15.414918Z'
     torchscript_onnx_qnn:
-      inference_time: 820.0
-      throughput: 1219.5121951219512
+      inference_time: 763.0
+      throughput: 1310.615989515072
       estimated_peak_memory_range:
-        min: 806912
-        max: 6264240
+        min: 12288
+        max: 1709784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 196
+        layers_on_npu: 151
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 196
-      job_id: j7gjr2k7p
+        total_layers: 151
+      job_id: jogkz0ygd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 571.0
+      throughput: 1751.3134851138354
+      estimated_peak_memory_range:
+        min: 12288
+        max: 51661744
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 151
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 151
+      job_id: j2p0y92gw
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:08:53.710000Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:27:44.926097Z'
+    torchscript_onnx_qnn:
+      inference_time: 547.0
+      throughput: 1828.1535648994516
+      estimated_peak_memory_range:
+        min: 12288
+        max: 52066480
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 151
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 151
+      job_id: j1gln8epv
+      job_status: Passed
 - name: MediaPipeHandLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1017.0
-      throughput: 983.284169124877
+      inference_time: 1047.0
+      throughput: 955.1098376313277
       estimated_peak_memory_range:
-        min: 24576
-        max: 2409872
+        min: 28672
+        max: 2017000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -80,28 +121,66 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: j1pvlrz75
+      job_id: jqpye2rgy
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:35:00.465711Z'
     torchscript_onnx_qnn:
-      inference_time: 1088.0
-      throughput: 919.1176470588235
+      inference_time: 996.0
+      throughput: 1004.0160642570281
       estimated_peak_memory_range:
-        min: 577536
-        max: 53567440
+        min: 24576
+        max: 10650592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 209
+        layers_on_npu: 158
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 209
-      job_id: jlpe7w475
+        total_layers: 158
+      job_id: jn5q81757
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 749.0
+      throughput: 1335.1134846461948
+      estimated_peak_memory_range:
+        min: 16384
+        max: 54372320
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 158
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 158
+      job_id: j1p8orzg9
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:12:22.243551Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:36:54.085694Z'
+    torchscript_onnx_qnn:
+      inference_time: 747.0
+      throughput: 1338.6880856760374
+      estimated_peak_memory_range:
+        min: 12288
+        max: 53941536
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 158
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 158
+      job_id: jw566mv5o
+      job_status: Passed
diff --git a/qai_hub_models/models/mediapipe_hand/requirements.txt b/qai_hub_models/models/mediapipe_hand/requirements.txt
deleted file mode 100644
index 9c11ddeb..00000000
--- a/qai_hub_models/models/mediapipe_hand/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-opencv-python
-requests
diff --git a/qai_hub_models/models/mediapipe_pose/README.md b/qai_hub_models/models/mediapipe_pose/README.md
index 693c634a..9a10818d 100644
--- a/qai_hub_models/models/mediapipe_pose/README.md
+++ b/qai_hub_models/models/mediapipe_pose/README.md
@@ -10,17 +10,12 @@ This is based on the implementation of MediaPipe-Pose-Estimation found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_pose).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
 ## Example & Usage
 
-Install the package via pip:
-```bash
-pip install "qai_hub_models[mediapipe_pose]"
-```
-
 
 Once installed, run the following simple CLI demo:
 
@@ -30,7 +25,7 @@ python -m qai_hub_models.models.mediapipe_pose.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Pose-Estimation can be found
   [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [BlazePose: On-device Real-time Body Pose tracking](https://arxiv.org/abs/2006.10204)
diff --git a/qai_hub_models/models/mediapipe_pose/conftest.py b/qai_hub_models/models/mediapipe_pose/conftest.py
new file mode 100644
index 00000000..2e30278e
--- /dev/null
+++ b/qai_hub_models/models/mediapipe_pose/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mediapipe_pose import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mediapipe_pose.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py
index 9d2b78aa..ec0c57a3 100644
--- a/qai_hub_models/models/mediapipe_pose/export.py
+++ b/qai_hub_models/models/mediapipe_pose/export.py
@@ -10,14 +10,14 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
 
 from qai_hub_models.models.mediapipe_pose import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -90,9 +90,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or ALL_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "mediapipe_pose",
@@ -111,68 +111,85 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "MediaPipePoseDetector" in components:
-        components_dict["MediaPipePoseDetector"] = model.pose_detector
+        components_dict["MediaPipePoseDetector"] = model.pose_detector  # type: ignore
     if "MediaPipePoseLandmarkDetector" in components:
-        components_dict["MediaPipePoseLandmarkDetector"] = model.pose_landmark_detector
+        components_dict["MediaPipePoseLandmarkDetector"] = model.pose_landmark_detector  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
             target_runtime, compile_options + " --force_channel_last_input image"
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
             # Convert inputs from channel first to channel last
             hub_inputs = transpose_channel_first_to_last(
                 "image", sample_inputs, target_runtime
             )
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=hub_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -181,8 +198,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -190,8 +207,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/mediapipe_pose/info.yaml b/qai_hub_models/models/mediapipe_pose/info.yaml
index 23e2045b..8665455f 100644
--- a/qai_hub_models/models/mediapipe_pose/info.yaml
+++ b/qai_hub_models/models/mediapipe_pose/info.yaml
@@ -12,6 +12,7 @@ tags:
 research_paper: https://arxiv.org/abs/2006.10204
 research_paper_title: 'BlazePose: On-device Real-time Body Pose tracking'
 license: https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/zmurez/MediaPipePyTorch/
 technical_details:
   Input resolution: 256x256
@@ -34,4 +35,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/mediapipe_pose/model.py b/qai_hub_models/models/mediapipe_pose/model.py
index ad32a893..7c96d6e5 100644
--- a/qai_hub_models/models/mediapipe_pose/model.py
+++ b/qai_hub_models/models/mediapipe_pose/model.py
@@ -138,7 +138,8 @@ def from_pretrained(
             pose_detector.load_anchors(detector_anchors)
             return cls(pose_detector, pose_detector.anchors)
 
-    def get_input_spec(self, batch_size: int = BATCH_SIZE) -> InputSpec:
+    @staticmethod
+    def get_input_spec(batch_size: int = BATCH_SIZE) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type) of the pose detector.
         This can be used to submit profiling job on Qualcomm AI Hub.
@@ -166,7 +167,8 @@ def from_pretrained(cls, landmark_detector_weights: str = "blazepose_landmark.pt
             pose_regressor.load_weights(landmark_detector_weights)
             cls(pose_regressor)
 
-    def get_input_spec(self, batch_size: int = BATCH_SIZE) -> InputSpec:
+    @staticmethod
+    def get_input_spec(batch_size: int = BATCH_SIZE) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type) of the pose landmark detector.
         This can be used to submit profiling job on Qualcomm AI Hub.
diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml
index 7b7ebe7c..f642b95b 100644
--- a/qai_hub_models/models/mediapipe_pose/perf.yaml
+++ b/qai_hub_models/models/mediapipe_pose/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MediaPipePoseDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 807.0
-      throughput: 1239.1573729863692
+      inference_time: 806.0
+      throughput: 1240.6947890818858
       estimated_peak_memory_range:
-        min: 28672
-        max: 1641432
+        min: 24576
+        max: 1736000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,39 +43,77 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: j1p3z1wz5
+      job_id: jygzelzg8
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:54:08.518654Z'
     torchscript_onnx_qnn:
-      inference_time: 865.0
-      throughput: 1156.0693641618498
+      inference_time: 808.0
+      throughput: 1237.6237623762377
       estimated_peak_memory_range:
-        min: 212992
-        max: 66280848
+        min: 28672
+        max: 4909504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 139
+        layers_on_npu: 106
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 139
-      job_id: j1pvlr9m5
+        total_layers: 106
+      job_id: jvgdwdk5j
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 577.0
+      throughput: 1733.102253032929
+      estimated_peak_memory_range:
+        min: 65536
+        max: 39641680
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 106
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 106
+      job_id: jmg9vzq57
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:22:09.229999Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:58:30.195464Z'
+    torchscript_onnx_qnn:
+      inference_time: 577.0
+      throughput: 1733.102253032929
+      estimated_peak_memory_range:
+        min: 61440
+        max: 40004608
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 106
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 106
+      job_id: jqp4qyqgo
+      job_status: Passed
 - name: MediaPipePoseLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1023.0
-      throughput: 977.5171065493646
+      inference_time: 1052.0
+      throughput: 950.5703422053232
       estimated_peak_memory_range:
-        min: 12288
-        max: 3253904
+        min: 16384
+        max: 2847296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -80,28 +121,66 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: jwgoln4dg
+      job_id: jz5wolzp1
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:04:16.645350Z'
     torchscript_onnx_qnn:
-      inference_time: 1101.0
-      throughput: 908.2652134423251
+      inference_time: 1063.0
+      throughput: 940.7337723424271
       estimated_peak_memory_range:
-        min: 20480
-        max: 149395360
+        min: 12288
+        max: 2768272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 305
+        layers_on_npu: 229
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 305
-      job_id: j7gjr2w8p
+        total_layers: 229
+      job_id: jz57zeqp3
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 756.0
+      throughput: 1322.7513227513227
+      estimated_peak_memory_range:
+        min: 12288
+        max: 84633232
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 229
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 229
+      job_id: jnp10nk5q
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:29:24.657545Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:06:07.166564Z'
+    torchscript_onnx_qnn:
+      inference_time: 772.0
+      throughput: 1295.3367875647668
+      estimated_peak_memory_range:
+        min: 12288
+        max: 84377840
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 229
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 229
+      job_id: j0pxvljg7
+      job_status: Passed
diff --git a/qai_hub_models/models/mediapipe_pose/requirements.txt b/qai_hub_models/models/mediapipe_pose/requirements.txt
deleted file mode 100644
index 9c11ddeb..00000000
--- a/qai_hub_models/models/mediapipe_pose/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-opencv-python
-requests
diff --git a/qai_hub_models/models/mediapipe_selfie/README.md b/qai_hub_models/models/mediapipe_selfie/README.md
index 9c0602dc..6c8c4440 100644
--- a/qai_hub_models/models/mediapipe_selfie/README.md
+++ b/qai_hub_models/models/mediapipe_selfie/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of MediaPipe-Selfie-Segmentation found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_selfie).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.mediapipe_selfie.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MediaPipe-Selfie-Segmentation can be found
   [here](https://github.com/google/mediapipe/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Image segmentation guide](https://developers.google.com/mediapipe/solutions/vision/image_segmenter/)
diff --git a/qai_hub_models/models/mediapipe_selfie/conftest.py b/qai_hub_models/models/mediapipe_selfie/conftest.py
new file mode 100644
index 00000000..70353f17
--- /dev/null
+++ b/qai_hub_models/models/mediapipe_selfie/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mediapipe_selfie import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mediapipe_selfie.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mediapipe_selfie/demo.py b/qai_hub_models/models/mediapipe_selfie/demo.py
index 38c43043..3c611dfd 100644
--- a/qai_hub_models/models/mediapipe_selfie/demo.py
+++ b/qai_hub_models/models/mediapipe_selfie/demo.py
@@ -4,8 +4,6 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-from typing import Type
-
 from PIL.Image import fromarray
 
 from qai_hub_models.models.mediapipe_selfie.app import SelfieSegmentationApp
@@ -15,13 +13,15 @@
     SelfieSegmentation,
 )
 from qai_hub_models.utils.args import (
-    add_output_dir_arg,
+    demo_model_from_cli_args,
     get_model_cli_parser,
-    model_from_cli_args,
+    get_on_device_demo_parser,
+    validate_on_device_demo_args,
 )
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
-from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.base_model import TargetRuntime
 from qai_hub_models.utils.display import display_or_save_image
+from qai_hub_models.utils.image_processing import pil_resize_pad, pil_undo_resize_pad
 
 IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
     MODEL_ID, MODEL_ASSET_VERSION, "selfie.jpg"
@@ -30,35 +30,38 @@
 
 # Run selfie segmentation app end-to-end on a sample image.
 # The demo will display the predicted mask in a window.
-def mediapipe_selfie_demo(
-    model_cls: Type[BaseModel],
-    default_image: str | CachedWebModelAsset,
+def main(
     is_test: bool = False,
 ):
     # Demo parameters
-    parser = get_model_cli_parser(model_cls)
+    parser = get_model_cli_parser(SelfieSegmentation)
+    parser = get_on_device_demo_parser(
+        parser, available_target_runtimes=[TargetRuntime.TFLITE], add_output_dir=True
+    )
     parser.add_argument(
         "--image",
         type=str,
-        default=default_image,
+        default=IMAGE_ADDRESS,
         help="File path or URL to an input image to use for the demo.",
     )
-    add_output_dir_arg(parser)
     args = parser.parse_args([] if is_test else None)
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # Load image & model
-    model = model_from_cli_args(model_cls, args)
-    print("Model loaded from pre-trained weights.")
-    image = load_image(args.image, verbose=True, desc="sample input image")
+    orig_image = load_image(args.image)
+    model = demo_model_from_cli_args(SelfieSegmentation, MODEL_ID, args)
 
     # Run app
     app = SelfieSegmentationApp(model)
+    (_, _, height, width) = SelfieSegmentation.get_input_spec()["image"][0]
+
+    image, scale, padding = pil_resize_pad(orig_image, (height, width))
     mask = app.predict(image) * 255.0
     mask = fromarray(mask).convert("L")
     if not is_test:
         # Make sure the input image and mask are resized so the demo can visually
         # show the images in the same resolution.
-        image = image.resize(mask.size)
+        image = pil_undo_resize_pad(image, orig_image.size, scale, padding)
         display_or_save_image(
             image, args.output_dir, "mediapipe_selfie_image.png", "sample input image"
         )
@@ -67,13 +70,5 @@ def mediapipe_selfie_demo(
         )
 
 
-def main(is_test: bool = False):
-    mediapipe_selfie_demo(
-        SelfieSegmentation,
-        IMAGE_ADDRESS,
-        is_test,
-    )
-
-
 if __name__ == "__main__":
-    main()
+    main(is_test=False)
diff --git a/qai_hub_models/models/mediapipe_selfie/export.py b/qai_hub_models/models/mediapipe_selfie/export.py
index b422b5e5..ce654b17 100644
--- a/qai_hub_models/models/mediapipe_selfie/export.py
+++ b/qai_hub_models/models/mediapipe_selfie/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -109,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,36 +158,40 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/mediapipe_selfie/info.yaml b/qai_hub_models/models/mediapipe_selfie/info.yaml
index 3c85bfbc..f40e5ca0 100644
--- a/qai_hub_models/models/mediapipe_selfie/info.yaml
+++ b/qai_hub_models/models/mediapipe_selfie/info.yaml
@@ -12,6 +12,7 @@ tags: []
 research_paper: https://developers.google.com/mediapipe/solutions/vision/image_segmenter/
 research_paper_title: Image segmentation guide
 license: https://github.com/google/mediapipe/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/google/mediapipe/tree/master/mediapipe/modules/selfie_segmentation
 technical_details:
@@ -36,4 +37,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/mediapipe_selfie/model.py b/qai_hub_models/models/mediapipe_selfie/model.py
index b9c65235..76bdd98f 100644
--- a/qai_hub_models/models/mediapipe_selfie/model.py
+++ b/qai_hub_models/models/mediapipe_selfie/model.py
@@ -197,8 +197,9 @@ def from_pretrained(cls, image_type: str = DEFAULT_IMAGE_TYPE):
         front_net.load_state_dict(front_state_dict, strict=True)
         return front_net
 
-    def get_input_spec(self, batch_size: int = 1) -> InputSpec:
-        if self.image_type == "square":
+    @staticmethod
+    def get_input_spec(batch_size: int = 1, image_type: str = "square") -> InputSpec:
+        if image_type == "square":
             height, width = 256, 256
         else:
             height, width = 144, 256
diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml
index f2615794..8e81b1b0 100644
--- a/qai_hub_models/models/mediapipe_selfie/perf.yaml
+++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MediaPipe-Selfie-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 817.0
-      throughput: 1223.9902080783354
+      inference_time: 821.0
+      throughput: 1218.026796589525
       estimated_peak_memory_range:
         min: 12288
-        max: 1802840
+        max: 2051880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jygzljvz5
+      job_id: j1p3kox52
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:06:02.750038Z'
     torchscript_onnx_qnn:
-      inference_time: 801.0
-      throughput: 1248.4394506866417
+      inference_time: 805.0
+      throughput: 1242.2360248447205
       estimated_peak_memory_range:
-        min: 811008
-        max: 91168416
+        min: 815104
+        max: 4449664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jz5wl3mzp
+      job_id: j1pv3275x
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 555.0
+      throughput: 1801.8018018018017
+      estimated_peak_memory_range:
+        min: 12288
+        max: 22552848
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 118
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 118
+      job_id: jwgoyd458
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:39:49.005922Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:09:04.960914Z'
+    torchscript_onnx_qnn:
+      inference_time: 550.0
+      throughput: 1818.1818181818182
+      estimated_peak_memory_range:
+        min: 176128
+        max: 42597216
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 139
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 139
+      job_id: jlpe967gr
+      job_status: Passed
diff --git a/qai_hub_models/models/mnasnet05/README.md b/qai_hub_models/models/mnasnet05/README.md
index 4605ef09..c89a516d 100644
--- a/qai_hub_models/models/mnasnet05/README.md
+++ b/qai_hub_models/models/mnasnet05/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of MNASNet05 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mnasnet05).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.mnasnet05.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MNASNet05 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [MnasNet: Platform-Aware Neural Architecture Search for Mobile](https://arxiv.org/abs/1807.11626)
diff --git a/qai_hub_models/models/mnasnet05/conftest.py b/qai_hub_models/models/mnasnet05/conftest.py
new file mode 100644
index 00000000..2e7f4bb5
--- /dev/null
+++ b/qai_hub_models/models/mnasnet05/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mnasnet05 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mnasnet05.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mnasnet05/demo.py b/qai_hub_models/models/mnasnet05/demo.py
index f674fb1c..1494a5b5 100644
--- a/qai_hub_models/models/mnasnet05/demo.py
+++ b/qai_hub_models/models/mnasnet05/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.mnasnet05.model import MNASNet05
+from qai_hub_models.models.mnasnet05.model import MODEL_ID, MNASNet05
 
 
 def main(is_test: bool = False):
-    imagenet_demo(MNASNet05, is_test)
+    imagenet_demo(MNASNet05, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/mnasnet05/export.py b/qai_hub_models/models/mnasnet05/export.py
index 670aaac1..c500d613 100644
--- a/qai_hub_models/models/mnasnet05/export.py
+++ b/qai_hub_models/models/mnasnet05/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/mnasnet05/info.yaml b/qai_hub_models/models/mnasnet05/info.yaml
index be98a382..c3ce91e0 100644
--- a/qai_hub_models/models/mnasnet05/info.yaml
+++ b/qai_hub_models/models/mnasnet05/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1807.11626
 research_paper_title: 'MnasNet: Platform-Aware Neural Architecture Search for Mobile'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/mnasnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/mnasnet05/model.py b/qai_hub_models/models/mnasnet05/model.py
index 0562410a..1c4e4e86 100644
--- a/qai_hub_models/models/mnasnet05/model.py
+++ b/qai_hub_models/models/mnasnet05/model.py
@@ -14,6 +14,6 @@
 
 class MNASNet05(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> MNASNet05:
         net = tv_models.mnasnet0_5(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml
index 8282b9b2..9536af51 100644
--- a/qai_hub_models/models/mnasnet05/perf.yaml
+++ b/qai_hub_models/models/mnasnet05/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MNASNet05
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 370.0
-      throughput: 2702.7027027027025
+      inference_time: 383.0
+      throughput: 2610.9660574412533
       estimated_peak_memory_range:
-        min: 12288
-        max: 8955784
+        min: 20480
+        max: 1718480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jmg9zyxvp
+      job_id: j1p8o1qg9
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:31:16.872390Z'
     torchscript_onnx_qnn:
-      inference_time: 367.0
-      throughput: 2724.7956403269754
+      inference_time: 358.0
+      throughput: 2793.2960893854747
       estimated_peak_memory_range:
-        min: 196608
-        max: 36330664
+        min: 634880
+        max: 4722696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 102
-      job_id: jnp1nwvlg
+      job_id: jn5q8ve57
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 282.0
+      throughput: 3546.099290780142
+      estimated_peak_memory_range:
+        min: 12288
+        max: 44089552
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 69
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 69
+      job_id: jogkz8vgd
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:13:59.738307Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:34:31.653300Z'
+    torchscript_onnx_qnn:
+      inference_time: 260.0
+      throughput: 3846.153846153846
+      estimated_peak_memory_range:
+        min: 0
+        max: 33635600
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 102
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 102
+      job_id: j1glnl2pv
+      job_status: Passed
diff --git a/qai_hub_models/models/mnasnet05/test.py b/qai_hub_models/models/mnasnet05/test.py
index e3758c14..254e9de5 100644
--- a/qai_hub_models/models/mnasnet05/test.py
+++ b/qai_hub_models/models/mnasnet05/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -16,6 +18,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(MNASNet05.from_pretrained())
 
diff --git a/qai_hub_models/models/mobilenet_v2/README.md b/qai_hub_models/models/mobilenet_v2/README.md
index 7d7910b8..65366af1 100644
--- a/qai_hub_models/models/mobilenet_v2/README.md
+++ b/qai_hub_models/models/mobilenet_v2/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of MobileNet-v2 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v2).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.mobilenet_v2.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v2 can be found
   [here](https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
diff --git a/qai_hub_models/models/mobilenet_v2/conftest.py b/qai_hub_models/models/mobilenet_v2/conftest.py
new file mode 100644
index 00000000..09f23c24
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v2/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mobilenet_v2 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mobilenet_v2.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mobilenet_v2/demo.py b/qai_hub_models/models/mobilenet_v2/demo.py
index b2100921..b91c82ce 100644
--- a/qai_hub_models/models/mobilenet_v2/demo.py
+++ b/qai_hub_models/models/mobilenet_v2/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.mobilenet_v2.model import MobileNetV2
+from qai_hub_models.models.mobilenet_v2.model import MODEL_ID, MobileNetV2
 
 
 def main(is_test: bool = False):
-    imagenet_demo(MobileNetV2, is_test)
+    imagenet_demo(MobileNetV2, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/mobilenet_v2/export.py b/qai_hub_models/models/mobilenet_v2/export.py
index c9c07912..d5d83eb6 100644
--- a/qai_hub_models/models/mobilenet_v2/export.py
+++ b/qai_hub_models/models/mobilenet_v2/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/mobilenet_v2/info.yaml b/qai_hub_models/models/mobilenet_v2/info.yaml
index 977fd7ed..693da4bb 100644
--- a/qai_hub_models/models/mobilenet_v2/info.yaml
+++ b/qai_hub_models/models/mobilenet_v2/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1801.04381
 research_paper_title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks'
 license: https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/tonylins/pytorch-mobilenet-v2/tree/master
 technical_details:
   Model checkpoint: Imagenet
@@ -37,6 +38,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/mobilenet_v2/model.py b/qai_hub_models/models/mobilenet_v2/model.py
index b8ba2bc4..3af9c3e9 100644
--- a/qai_hub_models/models/mobilenet_v2/model.py
+++ b/qai_hub_models/models/mobilenet_v2/model.py
@@ -29,10 +29,10 @@ def __init__(
         super().__init__(mobilenet_v2_model)
 
     @classmethod
-    def from_pretrained(cls) -> MobileNetV2:
+    def from_pretrained(cls, weights: str = MOBILENETV2_WEIGHTS) -> MobileNetV2:
         model = _load_mobilenet_v2_source_model()
         checkpoint_path = CachedWebModelAsset.from_asset_store(
-            MODEL_ID, MODEL_ASSET_VERSION, MOBILENETV2_WEIGHTS
+            MODEL_ID, MODEL_ASSET_VERSION, weights
         ).fetch()
         checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
         # rename classifier.1.weight -> classifier.weight, and bias similarly
@@ -45,9 +45,7 @@ def from_pretrained(cls) -> MobileNetV2:
         return cls(model)
 
 
-def _load_mobilenet_v2_source_model(
-    keep_sys_path=False,
-) -> torch.nn.Module:
+def _load_mobilenet_v2_source_model() -> torch.nn.Module:
     cfg_path = CachedWebModelAsset.from_asset_store(
         MODEL_ID, MODEL_ASSET_VERSION, MOBILENETV2_CFG
     ).fetch()
@@ -58,7 +56,6 @@ def _load_mobilenet_v2_source_model(
         MOBILENETV2_SOURCE_REPO_COMMIT,
         MODEL_ID,
         MODEL_ASSET_VERSION,
-        keep_sys_path=keep_sys_path,
     ):
         # necessary import. `modeling.deeplab` comes from the DeepLabV3 repo.
         from MobileNetV2 import MobileNetV2 as _MobileNetV2
diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml
index 7aa2220c..eaee0237 100644
--- a/qai_hub_models/models/mobilenet_v2/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v2/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MobileNet-v2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 533.0
-      throughput: 1876.172607879925
+      inference_time: 540.0
+      throughput: 1851.851851851852
       estimated_peak_memory_range:
-        min: 20480
-        max: 1466112
+        min: 12288
+        max: 1921936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jep2r9vmg
+      job_id: jygzeyzg8
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:53:26.997975Z'
     torchscript_onnx_qnn:
-      inference_time: 809.0
-      throughput: 1236.0939431396787
+      inference_time: 808.0
+      throughput: 1237.6237623762377
       estimated_peak_memory_range:
-        min: 618496
-        max: 5733064
+        min: 622592
+        max: 6011376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: jqpyoj745
+      job_id: jmg9v2q57
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 393.0
+      throughput: 2544.529262086514
+      estimated_peak_memory_range:
+        min: 12288
+        max: 55502880
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 70
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 70
+      job_id: jz5wozzp1
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:29:39.371442Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:57:53.996541Z'
+    torchscript_onnx_qnn:
+      inference_time: 537.0
+      throughput: 1862.1973929236499
+      estimated_peak_memory_range:
+        min: 618496
+        max: 37101856
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 104
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 104
+      job_id: jnp101k5q
+      job_status: Passed
diff --git a/qai_hub_models/models/mobilenet_v2/test.py b/qai_hub_models/models/mobilenet_v2/test.py
index 3b688c68..9a629c73 100644
--- a/qai_hub_models/models/mobilenet_v2/test.py
+++ b/qai_hub_models/models/mobilenet_v2/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -25,6 +27,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     run_imagenet_classifier_trace_test(MobileNetV2.from_pretrained())
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/README.md b/qai_hub_models/models/mobilenet_v2_quantized/README.md
index 541b9418..7a3bcf84 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/README.md
+++ b/qai_hub_models/models/mobilenet_v2_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of MobileNet-v2-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v2_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.mobilenet_v2_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v2-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/conftest.py b/qai_hub_models/models/mobilenet_v2_quantized/conftest.py
new file mode 100644
index 00000000..0ce2d5b9
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v2_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mobilenet_v2_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mobilenet_v2_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/demo.py b/qai_hub_models/models/mobilenet_v2_quantized/demo.py
index 89bb7cce..7bf265df 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/demo.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable
+from qai_hub_models.models.mobilenet_v2_quantized.model import (
+    MODEL_ID,
+    MobileNetV2Quantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(MobileNetV2Quantizable, is_test)
+    imagenet_demo(MobileNetV2Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/export.py b/qai_hub_models/models/mobilenet_v2_quantized/export.py
index 63655d62..f73c0796 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/export.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,35 +163,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml
index 21ac3a89..302fcc0a 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml
+++ b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1801.04381
 research_paper_title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/mobilenetv2
 technical_details:
   Model checkpoint: Imagenet
@@ -37,6 +38,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/model.py b/qai_hub_models/models/mobilenet_v2_quantized/model.py
index 5c922ffe..d72efcb4 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/model.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/model.py
@@ -8,13 +8,13 @@
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet import (
     AIMETQuantizableMixin,
-    HubCompileOptionsInt8Mixin,
 )
 
 # isort: on
 
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.mobilenet_v2.model import (
@@ -24,18 +24,17 @@
 from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+from qai_hub_models.utils.quantization_aimet import convert_all_depthwise_to_per_tensor
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 2
+MODEL_ASSET_VERSION = 3
 
 # Weights downloaded from https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/torch_mobilenetv2_w8a8_state_dict.pth
 QUANTIZED_WEIGHTS = "torch_mobilenetv2_w8a8_state_dict.pth"
-DEFAULT_ENCODINGS = "encodings.json"
+DEFAULT_ENCODINGS = "mobilenet_v2_quantized_encodings.json"
 
 
-class MobileNetV2Quantizable(
-    HubCompileOptionsInt8Mixin, AIMETQuantizableMixin, MobileNetV2
-):
+class MobileNetV2Quantizable(AIMETQuantizableMixin, MobileNetV2):
     """MobileNetV2 with post train quantization support."""
 
     def __init__(
@@ -66,13 +65,12 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         # Load Model
-        model_fp32 = _load_mobilenet_v2_source_model(
-            keep_sys_path=True,
-        )
-        input_shape = MobileNetV2(None).get_input_spec()["image_tensor"][0]
+        model = _load_mobilenet_v2_source_model()
+        input_shape = cls.get_input_spec()["image_tensor"][0]
         # Following
         # https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/mobilenetv2/model/model_definition.py#L64
-        equalize_model(model_fp32, input_shape)
+        model = prepare_model(model)
+        equalize_model(model, input_shape)
 
         # Download weights and quantization parameters
         weights = CachedWebModelAsset.from_asset_store(
@@ -80,21 +78,22 @@ def from_pretrained(
         ).fetch()
         aimet_config = get_default_aimet_config()
 
-        # Load the QAT/PTQ tuned model_fp32 weights
+        # Load the QAT/PTQ tuned model weights
         checkpoint = torch.load(weights, map_location=torch.device("cpu"))
         state_dict = {
             k.replace("classifier.1", "classifier"): v
             for k, v in checkpoint["state_dict"].items()
         }
-        model_fp32.load_state_dict(state_dict)
+        model.load_state_dict(state_dict)
         sim = QuantizationSimModel(
-            model_fp32,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
             config_file=aimet_config,
             dummy_input=torch.rand(input_shape),
         )
+        convert_all_depthwise_to_per_tensor(sim.model)
 
         if aimet_encodings:
             if aimet_encodings == "DEFAULT":
@@ -105,3 +104,11 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
index 9521afec..55ab7eff 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MobileNet-v2-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 240.0
-      throughput: 4166.666666666667
+      inference_time: 237.0
+      throughput: 4219.4092827004215
       estimated_peak_memory_range:
         min: 12288
-        max: 1557248
+        max: 1520264
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 70
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: j1p8em3zp
+      job_id: j1p3klz52
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:42:55.837359Z'
     torchscript_onnx_qnn:
-      inference_time: 'null'
-      throughput: 'null'
+      inference_time: 352.0
+      throughput: 2840.909090909091
       estimated_peak_memory_range:
-        min: 0
-        max: 0
-      primary_compute_unit: 'null'
-      precision: 'null'
+        min: 135168
+        max: 94316568
+      primary_compute_unit: NPU
+      precision: int8
       layer_info:
-        layers_on_npu: 0
+        layers_on_npu: 69
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 0
-      job_id: ''
-      job_status: Skipped
+        total_layers: 69
+      job_id: j1pv3ym5x
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 168.0
+      throughput: 5952.380952380952
+      estimated_peak_memory_range:
+        min: 12288
+        max: 35960128
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 70
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 70
+      job_id: jwgoy7d58
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:15:21.382192Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:47:22.207861Z'
+    torchscript_onnx_qnn:
+      inference_time: 253.0
+      throughput: 3952.5691699604745
+      estimated_peak_memory_range:
+        min: 163840
+        max: 35983856
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 69
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 69
+      job_id: j7gjx68pd
+      job_status: Passed
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/test.py b/qai_hub_models/models/mobilenet_v2_quantized/test.py
index b6c94ba4..9837761a 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/test.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.mobilenet_v2_quantized.demo import main as demo_main
 from qai_hub_models.models.mobilenet_v2_quantized.model import (
@@ -26,15 +25,6 @@ def test_task():
     )
 
 
-@skip_clone_repo_check
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        MobileNetV2Quantizable.from_pretrained(),
-        is_quantized=True,
-        atol=0.03,
-    )
-
-
 @skip_clone_repo_check
 def test_demo():
     # Verify demo does not crash
diff --git a/qai_hub_models/models/mobilenet_v3_large/README.md b/qai_hub_models/models/mobilenet_v3_large/README.md
index d36f9fa9..dc1194bc 100644
--- a/qai_hub_models/models/mobilenet_v3_large/README.md
+++ b/qai_hub_models/models/mobilenet_v3_large/README.md
@@ -3,14 +3,14 @@
 
 # [MobileNet-v3-Large: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/mobilenet_v3_large)
 
-MobileNetV3Large is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases.
+MobileNet-v3-Large is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases.
 
 This is based on the implementation of MobileNet-v3-Large found
 [here](https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py). This repository contains scripts for optimized on-device
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v3_large).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.mobilenet_v3_large.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v3-Large can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
diff --git a/qai_hub_models/models/mobilenet_v3_large/conftest.py b/qai_hub_models/models/mobilenet_v3_large/conftest.py
new file mode 100644
index 00000000..9733dfed
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mobilenet_v3_large import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mobilenet_v3_large.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mobilenet_v3_large/demo.py b/qai_hub_models/models/mobilenet_v3_large/demo.py
index 46a8418d..2eabd08a 100644
--- a/qai_hub_models/models/mobilenet_v3_large/demo.py
+++ b/qai_hub_models/models/mobilenet_v3_large/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.mobilenet_v3_large.model import MobileNetV3Large
+from qai_hub_models.models.mobilenet_v3_large.model import MODEL_ID, MobileNetV3Large
 
 
 def main(is_test: bool = False):
-    imagenet_demo(MobileNetV3Large, is_test)
+    imagenet_demo(MobileNetV3Large, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/mobilenet_v3_large/export.py b/qai_hub_models/models/mobilenet_v3_large/export.py
index 50da73da..5bb2fffd 100644
--- a/qai_hub_models/models/mobilenet_v3_large/export.py
+++ b/qai_hub_models/models/mobilenet_v3_large/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/mobilenet_v3_large/info.yaml b/qai_hub_models/models/mobilenet_v3_large/info.yaml
index d733455e..be6173d9 100644
--- a/qai_hub_models/models/mobilenet_v3_large/info.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large/info.yaml
@@ -4,7 +4,7 @@ id: mobilenet_v3_large
 status: public
 headline: Imagenet classifier and general purpose backbone.
 domain: Computer Vision
-description: MobileNetV3Large is a machine learning model that can classify images
+description: MobileNet-v3-Large is a machine learning model that can classify images
   from the Imagenet dataset. It can also be used as a backbone in building more complex
   models for specific use cases.
 use_case: Image Classification
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1905.02244
 research_paper_title: Searching for MobileNetV3
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/mobilenet_v3_large/model.py b/qai_hub_models/models/mobilenet_v3_large/model.py
index 578f0355..a69d5547 100644
--- a/qai_hub_models/models/mobilenet_v3_large/model.py
+++ b/qai_hub_models/models/mobilenet_v3_large/model.py
@@ -14,6 +14,6 @@
 
 class MobileNetV3Large(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> MobileNetV3Large:
         net = tv_models.mobilenet_v3_large(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml
index 5907cc30..a03a69e5 100644
--- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MobileNet-v3-Large
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 600.0
-      throughput: 1666.6666666666667
+      inference_time: 603.0
+      throughput: 1658.374792703151
       estimated_peak_memory_range:
-        min: 32768
-        max: 17746392
+        min: 12288
+        max: 2319320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: j1gly2ee5
+      job_id: jnp10025q
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:10:33.448407Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 433.0
+      throughput: 2309.4688221709007
+      estimated_peak_memory_range:
+        min: 12288
+        max: 60000912
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 134
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 134
+      job_id: jvgdwwe5j
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:19:38.868341Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:10:33.448414Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/mobilenet_v3_large/test.py b/qai_hub_models/models/mobilenet_v3_large/test.py
index 60de58c0..fb3cafa2 100644
--- a/qai_hub_models/models/mobilenet_v3_large/test.py
+++ b/qai_hub_models/models/mobilenet_v3_large/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(MobileNetV3Large.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(MobileNetV3Large.from_pretrained())
 
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md
new file mode 100644
index 00000000..da95e166
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md
@@ -0,0 +1,54 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [MobileNet-v3-Large-Quantized: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized)
+
+MobileNet-v3-Large is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases.
+
+This is based on the implementation of MobileNet-v3-Large-Quantized found
+[here](https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.mobilenet_v3_large_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.mobilenet_v3_large_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of MobileNet-v3-Large-Quantized can be found
+  [here](https://github.com/pytorch/vision/blob/main/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
+* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/__init__.py b/qai_hub_models/models/mobilenet_v3_large_quantized/__init__.py
new file mode 100644
index 00000000..32a17b68
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/__init__.py
@@ -0,0 +1,13 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.app import (  # noqa: F401
+    ImagenetClassifierApp as App,
+)
+from qai_hub_models.models.mobilenet_v3_large_quantized.model import (  # noqa: F401
+    MODEL_ID,
+)
+from qai_hub_models.models.mobilenet_v3_large_quantized.model import (  # noqa: F401
+    MobileNetV3LargeQuantizable as Model,
+)
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/conftest.py b/qai_hub_models/models/mobilenet_v3_large_quantized/conftest.py
new file mode 100644
index 00000000..7faea3d5
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mobilenet_v3_large_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mobilenet_v3_large_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/demo.py b/qai_hub_models/models/mobilenet_v3_large_quantized/demo.py
new file mode 100644
index 00000000..5577f0f4
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/demo.py
@@ -0,0 +1,23 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
+from qai_hub_models.models.mobilenet_v3_large_quantized.model import (
+    MODEL_ID,
+    MobileNetV3LargeQuantizable,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+
+
+def main(is_test: bool = False):
+    imagenet_demo(
+        MobileNetV3LargeQuantizable,
+        MODEL_ID,
+        is_test,
+        available_target_runtimes=[TargetRuntime.TFLITE],
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
new file mode 100644
index 00000000..5305d6b9
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
@@ -0,0 +1,202 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.mobilenet_v3_large_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "mobilenet_v3_large_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "mobilenet_v3_large_quantized",
+            "MobileNet-v3-Large-Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub.Device(device),
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(str(output_path / f"{model_name}.tflite"))
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model, supports_qnn=False)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml
new file mode 100644
index 00000000..9232ebd6
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml
@@ -0,0 +1,44 @@
+name: MobileNet-v3-Large-Quantized
+# id must match with the model dir name in qai_hub_models
+id: mobilenet_v3_large_quantized
+status: public
+headline: Imagenet classifier and general purpose backbone.
+domain: Computer Vision
+description: MobileNet-v3-Large is a machine learning model that can classify images
+  from the Imagenet dataset. It can also be used as a backbone in building more complex
+  models for specific use cases.
+use_case: Image Classification
+tags:
+  - quantized
+  - backbone
+  - real-time
+research_paper: https://arxiv.org/abs/1905.02244
+research_paper_title: Searching for MobileNetV3
+license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py
+technical_details:
+  Model checkpoint: Imagenet
+  Input resolution: 224x224
+  Number of parameters: 5.47M
+  Model size: 5.79 MB
+applicable_scenarios:
+  - Medical Imaging
+  - Anomaly Detection
+  - Inventory Management
+related_models:
+  - mobilenet_v2
+  - densenet121
+  - googlenet
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+  - XR
+has_static_banner: yes
+has_animated_banner: yes
+license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
+dataset:
+  - imagenet-1k
+  - imagenet-22k
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py
new file mode 100644
index 00000000..55b92db4
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py
@@ -0,0 +1,85 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.mobilenet_v3_large.model import MobileNetV3Large
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 1
+DEFAULT_ENCODINGS = "mobilenet_v3_large_quantized_encodings.json"
+
+
+class MobileNetV3LargeQuantizable(AIMETQuantizableMixin, MobileNetV3Large):
+    """MobileNetV3Large with post train quantization support.
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        sim_model: QuantizationSimModel,
+    ) -> None:
+        MobileNetV3Large.__init__(self, sim_model.model)
+        AIMETQuantizableMixin.__init__(
+            self,
+            sim_model,
+        )
+
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "MobileNetV3LargeQuantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        model = MobileNetV3Large.from_pretrained()
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+
+        model = prepare_model(model)
+        equalize_model(model, input_shape)
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=torch.rand(input_shape),
+        )
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        sim.model.eval()
+        return cls(sim)
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
new file mode 100644
index 00000000..724d7aa9
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
@@ -0,0 +1,108 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+models:
+- name: MobileNet-v3-Large-Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 2972.0
+      throughput: 336.47375504710635
+      estimated_peak_memory_range:
+        min: 12288
+        max: 3564432
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 136
+        layers_on_gpu: 0
+        layers_on_cpu: 15
+        total_layers: 151
+      job_id: j1pv3m75x
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:39:39.924043Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 2352.0
+      throughput: 425.1700680272109
+      estimated_peak_memory_range:
+        min: 0
+        max: 46180704
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 136
+        layers_on_gpu: 0
+        layers_on_cpu: 15
+        total_layers: 151
+      job_id: jlpe9x7gr
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:39:39.924051Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/test.py b/qai_hub_models/models/mobilenet_v3_large_quantized/test.py
new file mode 100644
index 00000000..6767deef
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/test.py
@@ -0,0 +1,29 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
+    run_imagenet_classifier_test,
+)
+from qai_hub_models.models.mobilenet_v3_large_quantized.demo import main as demo_main
+from qai_hub_models.models.mobilenet_v3_large_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    MobileNetV3LargeQuantizable,
+)
+
+
+def test_task():
+    run_imagenet_classifier_test(
+        MobileNetV3LargeQuantizable.from_pretrained(),
+        MODEL_ID,
+        asset_version=MODEL_ASSET_VERSION,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+def test_demo():
+    # Verify demo does not crash
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/mobilenet_v3_small/README.md b/qai_hub_models/models/mobilenet_v3_small/README.md
index 41d5e976..25937f85 100644
--- a/qai_hub_models/models/mobilenet_v3_small/README.md
+++ b/qai_hub_models/models/mobilenet_v3_small/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of MobileNet-v3-Small found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v3_small).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.mobilenet_v3_small.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of MobileNet-v3-Small can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)
diff --git a/qai_hub_models/models/mobilenet_v3_small/conftest.py b/qai_hub_models/models/mobilenet_v3_small/conftest.py
new file mode 100644
index 00000000..e523e36a
--- /dev/null
+++ b/qai_hub_models/models/mobilenet_v3_small/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.mobilenet_v3_small import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.mobilenet_v3_small.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/mobilenet_v3_small/demo.py b/qai_hub_models/models/mobilenet_v3_small/demo.py
index b603d666..eb6b15b9 100644
--- a/qai_hub_models/models/mobilenet_v3_small/demo.py
+++ b/qai_hub_models/models/mobilenet_v3_small/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.mobilenet_v3_small.model import MobileNetV3Small
+from qai_hub_models.models.mobilenet_v3_small.model import MODEL_ID, MobileNetV3Small
 
 
 def main(is_test: bool = False):
-    imagenet_demo(MobileNetV3Small, is_test)
+    imagenet_demo(MobileNetV3Small, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/mobilenet_v3_small/export.py b/qai_hub_models/models/mobilenet_v3_small/export.py
index 26c8f0f1..92037cb9 100644
--- a/qai_hub_models/models/mobilenet_v3_small/export.py
+++ b/qai_hub_models/models/mobilenet_v3_small/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/mobilenet_v3_small/info.yaml b/qai_hub_models/models/mobilenet_v3_small/info.yaml
index e26f6a3a..8984b9c8 100644
--- a/qai_hub_models/models/mobilenet_v3_small/info.yaml
+++ b/qai_hub_models/models/mobilenet_v3_small/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1905.02244
 research_paper_title: Searching for MobileNetV3
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/mobilenet_v3_small/model.py b/qai_hub_models/models/mobilenet_v3_small/model.py
index 2eb733dc..f41d01c4 100644
--- a/qai_hub_models/models/mobilenet_v3_small/model.py
+++ b/qai_hub_models/models/mobilenet_v3_small/model.py
@@ -14,6 +14,6 @@
 
 class MobileNetV3Small(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> MobileNetV3Small:
         net = tv_models.mobilenet_v3_small(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml
index 65b78de3..20a2652a 100644
--- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: MobileNet-v3-Small
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 427.0
-      throughput: 2341.92037470726
+      inference_time: 424.0
+      throughput: 2358.490566037736
       estimated_peak_memory_range:
-        min: 12288
-        max: 1724768
+        min: 36864
+        max: 1921728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: j1gly20e5
+      job_id: jlpe900gr
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:22:40.354876Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 288.0
+      throughput: 3472.222222222222
+      estimated_peak_memory_range:
+        min: 12288
+        max: 40067360
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 122
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 122
+      job_id: jygzeq6g8
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:09:16.610887Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:22:40.354885Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/mobilenet_v3_small/test.py b/qai_hub_models/models/mobilenet_v3_small/test.py
index 45f656b0..4d73f95b 100644
--- a/qai_hub_models/models/mobilenet_v3_small/test.py
+++ b/qai_hub_models/models/mobilenet_v3_small/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(MobileNetV3Small.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(MobileNetV3Small.from_pretrained())
 
diff --git a/qai_hub_models/models/openai_clip/README.md b/qai_hub_models/models/openai_clip/README.md
index 531af4a8..1212603c 100644
--- a/qai_hub_models/models/openai_clip/README.md
+++ b/qai_hub_models/models/openai_clip/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of OpenAI-Clip found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/openai_clip).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.openai_clip.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of OpenAI-Clip can be found
   [here](https://github.com/openai/CLIP/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020)
diff --git a/qai_hub_models/models/openai_clip/app.py b/qai_hub_models/models/openai_clip/app.py
index 92ffd8c3..3df122c1 100644
--- a/qai_hub_models/models/openai_clip/app.py
+++ b/qai_hub_models/models/openai_clip/app.py
@@ -97,8 +97,8 @@ def process_text(self, text: str) -> torch.Tensor:
         """
         return self.tokenizer(text)
 
+    @staticmethod
     def get_input_spec(
-        self,
         image_size: Tuple[int, int] = (224, 224),
         text_size: Tuple[int, int] = (3, 77),
     ) -> InputSpec:
diff --git a/qai_hub_models/models/openai_clip/conftest.py b/qai_hub_models/models/openai_clip/conftest.py
new file mode 100644
index 00000000..bb6c08ac
--- /dev/null
+++ b/qai_hub_models/models/openai_clip/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.openai_clip import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.openai_clip.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py
index d8d60fc1..780dff90 100644
--- a/qai_hub_models/models/openai_clip/export.py
+++ b/qai_hub_models/models/openai_clip/export.py
@@ -10,14 +10,14 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
 
 from qai_hub_models.models.openai_clip import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -90,9 +90,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or ALL_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "openai_clip",
@@ -111,68 +111,85 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "CLIPTextEncoder" in components:
-        components_dict["CLIPTextEncoder"] = model.text_encoder
+        components_dict["CLIPTextEncoder"] = model.text_encoder  # type: ignore
     if "CLIPImageEncoder" in components:
-        components_dict["CLIPImageEncoder"] = model.image_encoder
+        components_dict["CLIPImageEncoder"] = model.image_encoder  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
             target_runtime, compile_options + " --force_channel_last_input image"
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
             # Convert inputs from channel first to channel last
             hub_inputs = transpose_channel_first_to_last(
                 "image", sample_inputs, target_runtime
             )
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=hub_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -181,8 +198,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -190,8 +207,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/openai_clip/info.yaml b/qai_hub_models/models/openai_clip/info.yaml
index 2e439649..d3145f66 100644
--- a/qai_hub_models/models/openai_clip/info.yaml
+++ b/qai_hub_models/models/openai_clip/info.yaml
@@ -15,6 +15,7 @@ tags:
 research_paper: https://arxiv.org/abs/2103.00020
 research_paper_title: Learning Transferable Visual Models From Natural Language Supervision
 license: https://github.com/openai/CLIP/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/openai/CLIP/
 technical_details:
   Model checkpoint: ViT-B/16
@@ -35,4 +36,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: mit
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/openai_clip/model.py b/qai_hub_models/models/openai_clip/model.py
index 9e4c452d..5c94b0ef 100644
--- a/qai_hub_models/models/openai_clip/model.py
+++ b/qai_hub_models/models/openai_clip/model.py
@@ -90,8 +90,8 @@ def forward(self, text: torch.Tensor):
         text_features = text_features / text_features.norm(dim=1, keepdim=True)
         return text_features
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         text_length: int = 77,
     ) -> InputSpec:
@@ -135,8 +135,8 @@ def forward(self, image: torch.Tensor):
         image_features = image_features / image_features.norm(dim=1, keepdim=True)
         return self.net.logit_scale.exp() * image_features
 
+    @staticmethod
     def get_input_spec(
-        self,
         height: int = 224,
         width: int = 224,
     ) -> InputSpec:
diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml
index 2a6ddaed..0989352c 100644
--- a/qai_hub_models/models/openai_clip/perf.yaml
+++ b/qai_hub_models/models/openai_clip/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: CLIPTextEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 15528.0
-      throughput: 64.39979392065945
+      inference_time: 15516.0
+      throughput: 64.44960041247744
       estimated_peak_memory_range:
-        min: 40960
-        max: 3106072
+        min: 49152
+        max: 3267008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,39 +43,77 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: j2p0m2veg
+      job_id: jz5worjp1
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:47:17.422656Z'
     torchscript_onnx_qnn:
-      inference_time: 8149.0
-      throughput: 122.71444348999877
+      inference_time: 15586.0
+      throughput: 64.16014371872193
       estimated_peak_memory_range:
-        min: 40960
-        max: 23728064
+        min: 45056
+        max: 2975720
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 377
+        layers_on_npu: 574
         layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 377
-      job_id: jogk2q9og
+        layers_on_cpu: 2
+        total_layers: 576
+      job_id: jz57z1rp3
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 11115.0
+      throughput: 89.9685110211426
+      estimated_peak_memory_range:
+        min: 16384
+        max: 204316144
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 574
+        layers_on_gpu: 0
+        layers_on_cpu: 2
+        total_layers: 576
+      job_id: jnp10ml5q
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:25:08.294036Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:49:22.781059Z'
+    torchscript_onnx_qnn:
+      inference_time: 11246.0
+      throughput: 88.92050506846878
+      estimated_peak_memory_range:
+        min: 40960
+        max: 205502128
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 574
+        layers_on_gpu: 0
+        layers_on_cpu: 2
+        total_layers: 576
+      job_id: j0pxv89g7
+      job_status: Passed
 - name: CLIPImageEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 127729.0
-      throughput: 7.829075621041424
+      inference_time: 128196.0
+      throughput: 7.800555399544447
       estimated_peak_memory_range:
-        min: 159744
-        max: 3867320
+        min: 143360
+        max: 3847064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -80,28 +121,66 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 575
-      job_id: j1p8em48p
+      job_id: jmg9vqv57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:59:18.769511Z'
     torchscript_onnx_qnn:
-      inference_time: 50903.0
-      throughput: 19.645207551617784
+      inference_time: 127795.0
+      throughput: 7.825032278258147
+      estimated_peak_memory_range:
+        min: 180224
+        max: 4074336
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 575
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 575
+      job_id: jqp4q6lgo
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 98556.0
+      throughput: 10.14651568651325
       estimated_peak_memory_range:
-        min: 86016
-        max: 59741752
+        min: 163840
+        max: 781391856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 370
+        layers_on_npu: 575
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 370
-      job_id: jn5qlrmmp
+        total_layers: 575
+      job_id: jvgdwml5j
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:30:00.084732Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:01:23.890974Z'
+    torchscript_onnx_qnn:
+      inference_time: 97281.0
+      throughput: 10.279499593959766
+      estimated_peak_memory_range:
+        min: 237568
+        max: 783870384
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 575
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 575
+      job_id: jo5mr1qgk
+      job_status: Passed
diff --git a/qai_hub_models/models/openai_clip/requirements.txt b/qai_hub_models/models/openai_clip/requirements.txt
index 4245d2f9..b44c6052 100644
--- a/qai_hub_models/models/openai_clip/requirements.txt
+++ b/qai_hub_models/models/openai_clip/requirements.txt
@@ -1,3 +1,2 @@
-torchvision
 ftfy==6.1.1
 regex==2023.10.3
diff --git a/qai_hub_models/models/openpose/README.md b/qai_hub_models/models/openpose/README.md
index 98224c0f..34c86010 100644
--- a/qai_hub_models/models/openpose/README.md
+++ b/qai_hub_models/models/openpose/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of OpenPose found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/openpose).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.openpose.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of OpenPose can be found
   [here](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [OpenPose: Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields](https://arxiv.org/abs/1812.08008)
diff --git a/qai_hub_models/models/openpose/conftest.py b/qai_hub_models/models/openpose/conftest.py
new file mode 100644
index 00000000..49e58484
--- /dev/null
+++ b/qai_hub_models/models/openpose/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.openpose import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.openpose.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/openpose/demo.py b/qai_hub_models/models/openpose/demo.py
index 885d7631..23059539 100644
--- a/qai_hub_models/models/openpose/demo.py
+++ b/qai_hub_models/models/openpose/demo.py
@@ -2,13 +2,17 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-import argparse
-
 from qai_hub_models.models.openpose.app import OpenPoseApp
 from qai_hub_models.models.openpose.model import MODEL_ASSET_VERSION, MODEL_ID, OpenPose
-from qai_hub_models.utils.args import add_output_dir_arg
+from qai_hub_models.utils.args import (
+    demo_model_from_cli_args,
+    get_model_cli_parser,
+    get_on_device_demo_parser,
+    validate_on_device_demo_args,
+)
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
 from qai_hub_models.utils.display import display_or_save_image
+from qai_hub_models.utils.image_processing import pil_resize_pad, pil_undo_resize_pad
 
 IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
     MODEL_ID, MODEL_ASSET_VERSION, "openpose_demo.png"
@@ -19,21 +23,31 @@
 # The demo will display the input image with circles drawn over the estimated joint positions.
 def main(is_test: bool = False):
     # Demo parameters
-    parser = argparse.ArgumentParser()
+    parser = get_model_cli_parser(OpenPose)
+    parser = get_on_device_demo_parser(parser, add_output_dir=True)
     parser.add_argument(
         "--image",
         type=str,
         default=IMAGE_ADDRESS,
         help="image file path or URL.",
     )
-    add_output_dir_arg(parser)
 
     args = parser.parse_args([] if is_test else None)
+    model = demo_model_from_cli_args(OpenPose, MODEL_ID, args)
+    validate_on_device_demo_args(args, MODEL_ID)
+
+    # Load image
+    app = OpenPoseApp(model)
+    (_, _, height, width) = OpenPose.get_input_spec()["image"][0]
+    orig_image = load_image(args.image)
+    image, scale, padding = pil_resize_pad(orig_image, (height, width))
 
-    # Load image & model
-    app = OpenPoseApp(OpenPose.from_pretrained())
-    image = load_image(args.image)
+    # Run inference
     pred_image = app.estimate_pose(image)
+
+    # Resize / unpad annotated image
+    pred_image = pil_undo_resize_pad(pred_image, orig_image.size, scale, padding)
+
     if not is_test:
         display_or_save_image(pred_image, args.output_dir)
 
diff --git a/qai_hub_models/models/openpose/export.py b/qai_hub_models/models/openpose/export.py
index bb86fa55..98bbb750 100644
--- a/qai_hub_models/models/openpose/export.py
+++ b/qai_hub_models/models/openpose/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -109,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0,output_1",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,36 +158,40 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0,output_1", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/openpose/info.yaml b/qai_hub_models/models/openpose/info.yaml
index 1941c9ef..3ec39a9a 100644
--- a/qai_hub_models/models/openpose/info.yaml
+++ b/qai_hub_models/models/openpose/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/1812.08008
 research_paper_title: 'OpenPose: Realtime Multi-Person 2D Pose Estimation using Part
   Affinity Fields'
 license: https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/CMU-Perceptual-Computing-Lab/openpose
 technical_details:
   Model checkpoint: body_pose_model.pth
@@ -32,4 +33,5 @@ related_models:
 has_static_banner: yes
 has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/openpose/model.py b/qai_hub_models/models/openpose/model.py
index f2749ddf..6379a270 100644
--- a/qai_hub_models/models/openpose/model.py
+++ b/qai_hub_models/models/openpose/model.py
@@ -98,8 +98,8 @@ def forward(self, image: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 
         return paf, heatmap
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         num_channels: int = 3,
         height: int = 224,
diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml
index 29999a3f..6f677f3e 100644
--- a/qai_hub_models/models/openpose/perf.yaml
+++ b/qai_hub_models/models/openpose/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: OpenPose
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11747.0
-      throughput: 85.12811781731506
+      inference_time: 11718.0
+      throughput: 85.33879501621438
       estimated_peak_memory_range:
         min: 229376
-        max: 2462464
+        max: 2888976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jnp1nw3kg
+      job_id: j1pvokj5x
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:20:33.773079Z'
     torchscript_onnx_qnn:
-      inference_time: 11820.0
-      throughput: 84.60236886632826
+      inference_time: 11832.0
+      throughput: 84.51656524678837
       estimated_peak_memory_range:
-        min: 622592
-        max: 241891488
+        min: 643072
+        max: 242325320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 187
-      job_id: jvgddq0kg
+      job_id: jlpe1m15r
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 8755.0
+      throughput: 114.22044545973729
+      estimated_peak_memory_range:
+        min: 192512
+        max: 33307600
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 103
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 103
+      job_id: j7gjmnxgd
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:07:34.029953Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:22:29.557459Z'
+    torchscript_onnx_qnn:
+      inference_time: 8772.0
+      throughput: 113.99908800729594
+      estimated_peak_memory_range:
+        min: 618496
+        max: 53437584
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 187
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 187
+      job_id: jygz9dk58
+      job_status: Passed
diff --git a/qai_hub_models/models/openpose/requirements.txt b/qai_hub_models/models/openpose/requirements.txt
index ab4e628e..e4c17c9c 100644
--- a/qai_hub_models/models/openpose/requirements.txt
+++ b/qai_hub_models/models/openpose/requirements.txt
@@ -1,2 +1,2 @@
-scipy
-matplotlib
+scipy==1.8.1
+matplotlib==3.7.4
diff --git a/qai_hub_models/models/protocols.py b/qai_hub_models/models/protocols.py
new file mode 100644
index 00000000..e263482e
--- /dev/null
+++ b/qai_hub_models/models/protocols.py
@@ -0,0 +1,194 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""
+This file defines type helpers. Specifically, those helpers are python Protocols.
+
+Protocols are helpful for defining interfaces that must be implemented for specific functions.
+
+For example, a function may take any class that implements FromPretrained.
+The parameter would be typed "FromPretrainedProtocol", as defined in this file.
+
+Protocols may also be inherited to declare that a class must implement said protocol.
+For example, AIMETQuantizableMixin inherits HubModelProtocol. This informs the type
+checker that the class that inherits the mixin must implement HubModelProtocol.
+
+These are type checked at compile time.
+"""
+from __future__ import annotations
+
+from abc import abstractmethod
+from typing import Protocol, Type, TypeVar, runtime_checkable
+
+from qai_hub.client import DatasetEntries
+
+from qai_hub_models.evaluators.base_evaluators import BaseEvaluator, _DataLoader
+from qai_hub_models.models.common import SampleInputsType, TargetRuntime
+from qai_hub_models.utils.input_spec import InputSpec
+
+FromPretrainedTypeVar = TypeVar("FromPretrainedTypeVar", bound="FromPretrainedProtocol")
+
+FromPrecompiledTypeVar = TypeVar(
+    "FromPrecompiledTypeVar", bound="FromPrecompiledProtocol"
+)
+
+
+class HubModelProtocol(Protocol):
+    """
+    All AI Hub Models must, at minimum, implement this interface.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def get_input_spec(*args, **kwargs) -> InputSpec:
+        """
+        Returns a map from `{input_name -> (shape, dtype)}`
+        specifying the shape and dtype for each input argument.
+        """
+        ...
+
+    @abstractmethod
+    def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType:
+        """
+        Returns a set of sample inputs for the model.
+
+        For each input name in the model, a list of numpy arrays is provided.
+        If the returned set is batch N, all input names must contain exactly N numpy arrays.
+
+        This is a default implementation that returns a single random data array
+        for each input name based on the shapes and dtypes in `get_input_spec`.
+
+        A subclass may choose to override this and fetch a batch of real input data
+        from a data source.
+        """
+        ...
+
+
+class QuantizableModelProtocol(Protocol):
+    """
+    Methods required for a model to be quantizable.
+    """
+
+    @abstractmethod
+    def quantize(
+        self,
+        data: _DataLoader,
+        num_samples: int | None = None,
+        evaluator: BaseEvaluator | None = None,
+        device: str = "cpu",
+        requantize_model_weights=False,
+    ) -> float | None:
+        """
+        Compute quantization encodings for this model with the given dataset and model evaluator.
+
+        This model will be updated with a new set of quantization parameters. Future calls to
+        forward() and export_...() will take these quantization parameters into account.
+
+        Parameters:
+            data: torch DataLoader | Collection
+                Data loader for the dataset to use for evaluation.
+                    If an evaluator is __NOT__ provided (see "evaluator" parameter), the iterator must return
+                        inputs: Collection[torch.Tensor] | torch.Tensor
+
+                    otherwise, if an evaluator __IS__ provided, the iterator must return
+                        tuple(
+                          inputs: Collection[torch.Tensor] | torch.Tensor,
+                          ground_truth: Collection[torch.Tensor] | torch.Tensor]
+                        )
+
+            num_samples: int | None
+                Number of samples to use for evaluation. One sample is one iteration from iter(data).
+                If none, defaults to the number of samples in the dataset.
+
+            evaluator: BaseModelEvaluator | None
+                Evaluator to populate while quantizing the data.
+                If not provided, an evaluator is not used.
+
+            device: str
+                Name of device on which inference should be run.
+
+            requantize_model_weights: bool
+                If a weight is quantized, recompute its quantization parameters.
+
+        Returns:
+            If an evaluator is provided, returns its accuracy score. No return value otherwise.
+        """
+        ...
+
+    @abstractmethod
+    def get_calibration_data(
+        self,
+        target_runtime: TargetRuntime,
+        input_spec: InputSpec | None = None,
+    ) -> DatasetEntries | None:
+        """
+        Calibration dataset for this model and input spec.
+        """
+        ...
+
+
+class ExecutableModelProtocol(Protocol):
+    """
+    Classes follow this protocol if they are executable.
+    """
+
+    @abstractmethod
+    def __call__(self, *args, **kwargs):
+        """
+        Execute the model and return its output.
+        """
+        ...
+
+
+@runtime_checkable
+class EvalModelProtocol(Protocol):
+    """
+    Models follow this protocol if they can be numerically evaluated.
+    """
+
+    @abstractmethod
+    def get_evaluator(self) -> BaseEvaluator:
+        """
+        Gets a class for evaluating output of this model.
+        """
+        ...
+
+
+@runtime_checkable
+class FromPretrainedProtocol(Protocol):
+    """
+    Models follow this protocol if they can be initiated from a pretrained torch model.
+    """
+
+    @classmethod
+    @abstractmethod
+    def from_pretrained(
+        cls: Type[FromPretrainedTypeVar], *args, **kwargs
+    ) -> FromPretrainedTypeVar:
+        """
+        Utility function that helps users get up and running with a default
+        pretrained model. While this function may take arguments, all arguments
+        should have default values specified, so that all classes can be invoked
+        with `cls.from_pretrained()` and always have it return something reasonable.
+        """
+        ...
+
+
+class FromPrecompiledProtocol(Protocol):
+    """
+    Models follow this protocol if they can be initiated from a precompiled torch model.
+    """
+
+    @classmethod
+    @abstractmethod
+    def from_precompiled(
+        cls: Type[FromPrecompiledTypeVar], *args, **kwargs
+    ) -> "FromPrecompiledTypeVar":
+        """
+        Utility function that helps users get up and running with a default
+        precompiled model. While this function may take arguments, all arguments
+        should have default values specified, so that all classes can be invoked
+        with `cls.from_precompiled()` and always have it return something reasonable.
+        """
+        ...
diff --git a/qai_hub_models/models/quicksrnetlarge/README.md b/qai_hub_models/models/quicksrnetlarge/README.md
index dece0985..9d1f26c2 100644
--- a/qai_hub_models/models/quicksrnetlarge/README.md
+++ b/qai_hub_models/models/quicksrnetlarge/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of QuickSRNetLarge found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetlarge).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.quicksrnetlarge.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetLarge can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetlarge/conftest.py b/qai_hub_models/models/quicksrnetlarge/conftest.py
new file mode 100644
index 00000000..b6f5b722
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.quicksrnetlarge import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.quicksrnetlarge.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/quicksrnetlarge/demo.py b/qai_hub_models/models/quicksrnetlarge/demo.py
index 77a29a77..12d688c3 100644
--- a/qai_hub_models/models/quicksrnetlarge/demo.py
+++ b/qai_hub_models/models/quicksrnetlarge/demo.py
@@ -20,6 +20,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=QuickSRNetLarge,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/quicksrnetlarge/export.py b/qai_hub_models/models/quicksrnetlarge/export.py
index 07628b5b..9e1976a4 100644
--- a/qai_hub_models/models/quicksrnetlarge/export.py
+++ b/qai_hub_models/models/quicksrnetlarge/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -109,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,36 +158,40 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/quicksrnetlarge/info.yaml b/qai_hub_models/models/quicksrnetlarge/info.yaml
index 3bb825dc..b9cc532d 100644
--- a/qai_hub_models/models/quicksrnetlarge/info.yaml
+++ b/qai_hub_models/models/quicksrnetlarge/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_large_4x_checkpoint_float32
@@ -27,6 +28,7 @@ form_factors:
   - Tablet
 related_models: [xlsr, esrgan, quicksrnetlarge_quantized]
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/quicksrnetlarge/model.py b/qai_hub_models/models/quicksrnetlarge/model.py
index 609b9163..bfed7f6c 100644
--- a/qai_hub_models/models/quicksrnetlarge/model.py
+++ b/qai_hub_models/models/quicksrnetlarge/model.py
@@ -40,8 +40,6 @@ def __init__(
     @classmethod
     def from_pretrained(cls) -> QuickSRNetLarge:
         model = _load_quicksrnet_source_model(
-            MODEL_ID,
-            MODEL_ASSET_VERSION,
             SCALING_FACTOR,
             NUM_CHANNELS,
             NUM_INTERMEDIATE_LAYERS,
diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml
index 1b0102ea..d7559b5d 100644
--- a/qai_hub_models/models/quicksrnetlarge/perf.yaml
+++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: QuickSRNetLarge
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2532.0
-      throughput: 394.9447077409163
+      inference_time: 2500.0
+      throughput: 400.0
       estimated_peak_memory_range:
         min: 16384
-        max: 8035880
+        max: 1492864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jz57el4rp
+      job_id: jn5q8l757
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:06:24.010143Z'
     torchscript_onnx_qnn:
-      inference_time: 2106.0
-      throughput: 474.8338081671415
+      inference_time: 2109.0
+      throughput: 474.158368895211
       estimated_peak_memory_range:
-        min: 212992
-        max: 76319976
+        min: 16384
+        max: 5120280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 32
-      job_id: jqp4yd1lp
+      job_id: jw5668v5o
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1780.0
+      throughput: 561.7977528089888
+      estimated_peak_memory_range:
+        min: 20480
+        max: 27633264
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 28
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 31
+      job_id: j1glnyepv
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:38:01.534196Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:10:42.992618Z'
+    torchscript_onnx_qnn:
+      inference_time: 1506.0
+      throughput: 664.0106241699867
+      estimated_peak_memory_range:
+        min: 208896
+        max: 18546960
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 32
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 32
+      job_id: j1p3kzx52
+      job_status: Passed
diff --git a/qai_hub_models/models/quicksrnetlarge/test.py b/qai_hub_models/models/quicksrnetlarge/test.py
index 66f6fc53..ad63526d 100644
--- a/qai_hub_models/models/quicksrnetlarge/test.py
+++ b/qai_hub_models/models/quicksrnetlarge/test.py
@@ -35,5 +35,6 @@ def test_task():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/README.md b/qai_hub_models/models/quicksrnetlarge_quantized/README.md
new file mode 100644
index 00000000..22069d4a
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/README.md
@@ -0,0 +1,54 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [QuickSRNetLarge-Quantized: Upscale images and remove image noise](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized)
+
+QuickSRNet Large is designed for upscaling images on mobile platforms to sharpen in real-time.
+
+This is based on the implementation of QuickSRNetLarge-Quantized found
+[here](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.quicksrnetlarge_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.quicksrnetlarge_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of QuickSRNetLarge-Quantized can be found
+  [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
+* [Source Model Implementation](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/__init__.py b/qai_hub_models/models/quicksrnetlarge_quantized/__init__.py
new file mode 100644
index 00000000..dbfdb539
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.super_resolution.app import (  # noqa: F401
+    SuperResolutionApp as App,
+)
+
+from .model import MODEL_ID  # noqa: F401
+from .model import QuickSRNetLargeQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py b/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py
new file mode 100644
index 00000000..c1b1a9d9
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.quicksrnetlarge_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.quicksrnetlarge_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/demo.py b/qai_hub_models/models/quicksrnetlarge_quantized/demo.py
new file mode 100644
index 00000000..53d37094
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/demo.py
@@ -0,0 +1,28 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
+from qai_hub_models.models.quicksrnetlarge_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    QuickSRNetLargeQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnet_demo.jpg"
+)
+
+
+def main(is_test: bool = False):
+    super_resolution_demo(
+        QuickSRNetLargeQuantizable,
+        MODEL_ID,
+        default_image=IMAGE_ADDRESS,
+        is_test=is_test,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py
new file mode 100644
index 00000000..ea5568eb
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py
@@ -0,0 +1,215 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.quicksrnetlarge_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "quicksrnetlarge_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "quicksrnetlarge_quantized",
+            "QuickSRNetLarge-Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime,
+        compile_options
+        + " --force_channel_last_input image"
+        + " --force_channel_last_output output_0",
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub.Device(device),
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = transpose_channel_first_to_last(
+            "image", sample_inputs, target_runtime
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(str(output_path / f"{model_name}.tflite"))
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = transpose_channel_last_to_first(
+            "output_0", inference_result, target_runtime
+        )
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model, supports_qnn=False)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
new file mode 100644
index 00000000..93c2001d
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
@@ -0,0 +1,35 @@
+name: QuickSRNetLarge-Quantized
+# id must match with the model dir name in qai_hub_models
+id: quicksrnetlarge_quantized
+status: public
+headline: Upscale images and remove image noise.
+domain: Computer Vision
+description: QuickSRNet Large is designed for upscaling images on mobile platforms
+  to sharpen in real-time.
+use_case: Super Resolution
+tags:
+  - quantized
+research_paper: https://arxiv.org/abs/2303.04336
+research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
+  for Faster Inference on Mobile Platforms'
+license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
+technical_details:
+  Model checkpoint: quicksrnet_large_4x_checkpoint_int8
+  Input resolution: 128x128
+  Number of parameters: 436K
+  Model size: 464 KB
+applicable_scenarios:
+  - Virtual Real Estate Tours
+  - Gaming
+  - ARVR
+form_factors:
+  - Phone
+  - Tablet
+related_models: [xlsr, xlsr_quantized, quicksrnetlarge]
+has_static_banner: yes
+has_animated_banner: yes
+license_type: other
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py
new file mode 100644
index 00000000..9bca792d
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py
@@ -0,0 +1,99 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.quicksrnetlarge.model import QuickSRNetLarge
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config_legacy_v2
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 2
+
+# Weights and config stored in S3 are sourced from
+# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_large_4x_w8a8.json:
+# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_large_4x_checkpoint_int8.pth
+# and
+# https://raw.githubusercontent.com/quic/aimet/release-aimet-1.23/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.js
+# Encodings were generated with AIMET QuantSim library
+QUANTIZED_WEIGHTS = "quicksrnet_large_4x_checkpoint_int8.pth"
+AIMET_ENCODINGS = "aimet_quantization_encodings.json"
+SCALING_FACTOR = 4
+
+
+class QuickSRNetLargeQuantizable(AIMETQuantizableMixin, QuickSRNetLarge):
+    """QuickSRNetLarge with post train quantization support.
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        quicksrnet_model: QuantizationSimModel,
+    ) -> None:
+        QuickSRNetLarge.__init__(self, quicksrnet_model.model)
+        AIMETQuantizableMixin.__init__(
+            self, quicksrnet_model, needs_onnx_direct_aimet_export=True
+        )
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "QuickSRNetLargeQuantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        # Load Model
+        quicksrnet = QuickSRNetLarge.from_pretrained()
+        input_shape = quicksrnet.get_input_spec()["image"][0]
+        equalize_model(quicksrnet, input_shape)
+
+        # Download weights and quantization parameters
+        weights = CachedWebModelAsset.from_asset_store(
+            MODEL_ID, MODEL_ASSET_VERSION, QUANTIZED_WEIGHTS
+        ).fetch()
+        aimet_config = get_default_aimet_config_legacy_v2()
+
+        # Load the model weights and quantization parameters
+        # In this particular instance, the state_dict keys from the model are all named "model.<expected name>"
+        # where <expected name> is the name of each key in the weights file - without the word model.
+        # We rename all the keys to add the word model
+        state_dict = torch.load(weights, map_location=torch.device("cpu"))["state_dict"]
+        new_state_dict = {"model." + key: value for key, value in state_dict.items()}
+        quicksrnet.load_state_dict(new_state_dict)
+        sim = QuantizationSimModel(
+            quicksrnet,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=aimet_config,
+            dummy_input=torch.rand(input_shape),
+        )
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, AIMET_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        sim.model.eval()
+
+        return cls(sim)
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
new file mode 100644
index 00000000..174979a6
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
@@ -0,0 +1,108 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+models:
+- name: QuickSRNetLarge-Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 1868.0
+      throughput: 535.3319057815846
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1533296
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 30
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 33
+      job_id: jygze66g8
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:21:00.166706Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 1484.0
+      throughput: 673.8544474393531
+      estimated_peak_memory_range:
+        min: 20480
+        max: 25007104
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 30
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 33
+      job_id: jz5wokjp1
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:21:00.166728Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/test.py b/qai_hub_models/models/quicksrnetlarge_quantized/test.py
new file mode 100644
index 00000000..32337b60
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/test.py
@@ -0,0 +1,89 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import os
+import tempfile
+import zipfile
+
+import numpy as np
+import pytest
+import torch
+
+from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
+from qai_hub_models.models.quicksrnetlarge_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models.quicksrnetlarge_quantized.demo import main as demo_main
+from qai_hub_models.models.quicksrnetlarge_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    QuickSRNetLargeQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
+from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
+
+OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_quantized_output.png"
+)
+
+
+@skip_clone_repo_check
+def test_task():
+    # AIMET Quantization Simulator introduces randomness. Eliminate that for this test.
+    torch.manual_seed(0)
+    image = load_image(IMAGE_ADDRESS)
+    model = QuickSRNetLargeQuantizable.from_pretrained()
+    app = SuperResolutionApp(model=model)
+    app_output_image = app.predict(image)[0]
+
+    output_image = load_image(OUTPUT_IMAGE_ADDRESS)
+    assert_most_close(
+        np.asarray(app_output_image, dtype=np.float32) / 255,
+        np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+@pytest.mark.trace
+@skip_clone_repo_check
+def test_trace():
+    image = load_image(IMAGE_ADDRESS)
+    output_image = load_image(OUTPUT_IMAGE_ADDRESS)
+    app = SuperResolutionApp(
+        QuickSRNetLargeQuantizable.from_pretrained().convert_to_torchscript()
+    )
+    app_output_image = app.predict(image)[0]
+
+    assert_most_close(
+        np.asarray(app_output_image, dtype=np.float32) / 255,
+        np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606")
+@skip_clone_repo_check
+def test_aimet_export():
+    model = QuickSRNetLargeQuantizable.from_pretrained()
+    name = model.__class__.__name__
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_zip = model.convert_to_onnx_and_aimet_encodings(
+            tmpdir,
+        )
+        assert os.path.exists(output_zip)
+        with zipfile.ZipFile(output_zip, "r") as zip:
+            assert zip.namelist() == [
+                f"{name}.aimet/",
+                f"{name}.aimet/{name}.onnx",
+                f"{name}.aimet/{name}.encodings",
+            ]
+
+    # No test of torchscipt and aimet encodings due to #8954
+
+
+@skip_clone_repo_check
+def test_demo():
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetmedium/README.md b/qai_hub_models/models/quicksrnetmedium/README.md
index 9c489c7e..b4cf8f71 100644
--- a/qai_hub_models/models/quicksrnetmedium/README.md
+++ b/qai_hub_models/models/quicksrnetmedium/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of QuickSRNetMedium found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetmedium).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.quicksrnetmedium.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetMedium can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetmedium/conftest.py b/qai_hub_models/models/quicksrnetmedium/conftest.py
new file mode 100644
index 00000000..9f8c04db
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.quicksrnetmedium import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.quicksrnetmedium.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/quicksrnetmedium/demo.py b/qai_hub_models/models/quicksrnetmedium/demo.py
index 55e5d7a1..51c1ffec 100644
--- a/qai_hub_models/models/quicksrnetmedium/demo.py
+++ b/qai_hub_models/models/quicksrnetmedium/demo.py
@@ -20,6 +20,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=QuickSRNetMedium,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/quicksrnetmedium/export.py b/qai_hub_models/models/quicksrnetmedium/export.py
index 9c9cde02..ad1016dc 100644
--- a/qai_hub_models/models/quicksrnetmedium/export.py
+++ b/qai_hub_models/models/quicksrnetmedium/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -109,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,36 +158,40 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/quicksrnetmedium/info.yaml b/qai_hub_models/models/quicksrnetmedium/info.yaml
index aaa2691e..72ae05be 100644
--- a/qai_hub_models/models/quicksrnetmedium/info.yaml
+++ b/qai_hub_models/models/quicksrnetmedium/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_medium_4x_checkpoint_float32
@@ -27,6 +28,7 @@ form_factors:
   - Tablet
 related_models: [xlsr, esrgan, quicksrnetlarge]
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/quicksrnetmedium/model.py b/qai_hub_models/models/quicksrnetmedium/model.py
index 24c4345c..e050d160 100644
--- a/qai_hub_models/models/quicksrnetmedium/model.py
+++ b/qai_hub_models/models/quicksrnetmedium/model.py
@@ -40,8 +40,6 @@ def __init__(
     @classmethod
     def from_pretrained(cls) -> QuickSRNetMedium:
         model = _load_quicksrnet_source_model(
-            MODEL_ID,
-            MODEL_ASSET_VERSION,
             SCALING_FACTOR,
             NUM_CHANNELS,
             NUM_INTERMEDIATE_LAYERS,
diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml
index 5851a60e..f1316110 100644
--- a/qai_hub_models/models/quicksrnetmedium/perf.yaml
+++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml
@@ -1,76 +1,92 @@
 models:
 - name: QuickSRNetMedium
   performance_metrics:
-  - reference_device_info:
-      name: Samsung Galaxy S23
-      os: '13'
-      form_factor: Phone
-      os_name: Android
-      manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-08T22:33:17.244157Z'
-    torchscript_onnx_tflite:
-      inference_time: 1407.0
-      throughput: 710.7320540156361
+  - torchscript_onnx_tflite:
+      inference_time: 1398.0
+      throughput: 715.307582260372
       estimated_peak_memory_range:
-        min: 32768
-        max: 8364248
+        min: 16384
+        max: 8236496
+      primary_compute_unit: NPU
+      precision: fp16
       layer_info:
         layers_on_npu: 14
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      precision: fp16
-      primary_compute_unit: NPU
-      job_id: jvgd2x1z5
+      job_id: jwgoy9d58
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:36.328807Z'
     torchscript_onnx_qnn:
-      inference_time: 992.0
-      throughput: 1008.0645161290323
+      inference_time: 989.0
+      throughput: 1011.1223458038422
       estimated_peak_memory_range:
-        min: 217088
-        max: 28908792
+        min: 212992
+        max: 7267624
+      primary_compute_unit: NPU
+      precision: fp16
       layer_info:
         layers_on_npu: 18
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 18
-      precision: fp16
-      primary_compute_unit: NPU
-      job_id: j1gllveeg
+      job_id: j7gjx88pd
       job_status: Passed
-    torchscript_onnx_ort_qnn_htp:
-      inference_time: 17078.0
-      throughput: 58.55486590935707
+  - torchscript_onnx_tflite:
+      inference_time: 935.0
+      throughput: 1069.51871657754
       estimated_peak_memory_range:
-        min: 15241216
-        max: 26970304
+        min: 16384
+        max: 19630352
+      primary_compute_unit: NPU
+      precision: fp16
       layer_info:
-        layers_on_npu: 0
+        layers_on_npu: 14
         layers_on_gpu: 0
-        layers_on_cpu: 8
-        total_layers: 8
-      precision: fp32
-      primary_compute_unit: CPU
-      job_id: j0pxxkv3p
+        layers_on_cpu: 3
+        total_layers: 17
+      job_id: j1pv3nm5x
       job_status: Passed
-    torchscript_qnn:
-      inference_time: 'null'
-      throughput: 'null'
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:15:44.251341Z'
+    torchscript_onnx_qnn:
+      inference_time: 648.0
+      throughput: 1543.20987654321
       estimated_peak_memory_range:
-        min: 0
-        max: 0
+        min: 208896
+        max: 14213744
+      primary_compute_unit: NPU
+      precision: fp16
       layer_info:
-        layers_on_npu: 'null'
-        layers_on_gpu: 'null'
-        layers_on_cpu: 'null'
-        total_layers: 'null'
-      precision: 'null'
-      primary_compute_unit: 'null'
-      job_id: 'null'
-      job_status: 'null'
+        layers_on_npu: 18
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 18
+      job_id: jlpe9n0gr
+      job_status: Passed
 aggregated:
+  supported_oses:
+  - Android
   supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -80,58 +96,13 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
+  - Xiaomi 12
   - Xiaomi 12 Pro
-  - Xiaomi 13
-  - Xiaomi 13 Pro
-  supported_oses:
-  - Android
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
-  performance_metrics:
-  - reference_device_info:
-      name: Samsung Galaxy S23
-      os: '13'
-      form_factor: Phone
-      os_name: Android
-      manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-08T22:33:19.043922Z'
-    torchscript_onnx_tflite:
-      inference_time: 1407.0
-      throughput: 710.7320540156361
-      estimated_peak_memory_range:
-        min: 32768
-        max: 8364248
-      precision: fp16
-      primary_compute_unit: NPU
-      job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 992.0
-      throughput: 1008.0645161290323
-      estimated_peak_memory_range:
-        min: 217088
-        max: 28908792
-      precision: fp16
-      primary_compute_unit: NPU
-      job_status: Passed
-    torchscript_onnx_ort_qnn_htp:
-      inference_time: 17078.0
-      throughput: 58.55486590935707
-      estimated_peak_memory_range:
-        min: 15241216
-        max: 26970304
-      precision: fp32
-      primary_compute_unit: CPU
-      job_status: Passed
-    torchscript_qnn:
-      inference_time: 'null'
-      throughput: 'null'
-      estimated_peak_memory_range:
-        min: 0.0
-        max: 0.0
-      precision: 'null'
-      primary_compute_unit: 'null'
-      job_status: 'null'
diff --git a/qai_hub_models/models/quicksrnetmedium/test.py b/qai_hub_models/models/quicksrnetmedium/test.py
index aca388ad..9cd04d8e 100644
--- a/qai_hub_models/models/quicksrnetmedium/test.py
+++ b/qai_hub_models/models/quicksrnetmedium/test.py
@@ -35,5 +35,6 @@ def test_task():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/README.md b/qai_hub_models/models/quicksrnetmedium_quantized/README.md
new file mode 100644
index 00000000..87b906ad
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/README.md
@@ -0,0 +1,54 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [QuickSRNetMedium-Quantized: Upscale images and remove image noise](https://aihub.qualcomm.com/models/quicksrnetmedium_quantized)
+
+QuickSRNet Medium is designed for upscaling images on mobile platforms to sharpen in real-time.
+
+This is based on the implementation of QuickSRNetMedium-Quantized found
+[here](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetmedium_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.quicksrnetmedium_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.quicksrnetmedium_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of QuickSRNetMedium-Quantized can be found
+  [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
+* [Source Model Implementation](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/__init__.py b/qai_hub_models/models/quicksrnetmedium_quantized/__init__.py
new file mode 100644
index 00000000..acef8c96
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.super_resolution.app import (  # noqa: F401
+    SuperResolutionApp as App,
+)
+
+from .model import MODEL_ID  # noqa: F401
+from .model import QuickSRNetMediumQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py b/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py
new file mode 100644
index 00000000..8ede0a24
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.quicksrnetmedium_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.quicksrnetmedium_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/demo.py b/qai_hub_models/models/quicksrnetmedium_quantized/demo.py
new file mode 100644
index 00000000..f45370ab
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/demo.py
@@ -0,0 +1,28 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
+from qai_hub_models.models.quicksrnetmedium_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    QuickSRNetMediumQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo.jpg"
+)
+
+
+def main(is_test: bool = False):
+    super_resolution_demo(
+        QuickSRNetMediumQuantizable,
+        MODEL_ID,
+        default_image=IMAGE_ADDRESS,
+        is_test=is_test,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py
new file mode 100644
index 00000000..da516367
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py
@@ -0,0 +1,215 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.quicksrnetmedium_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "quicksrnetmedium_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "quicksrnetmedium_quantized",
+            "QuickSRNetMedium-Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime,
+        compile_options
+        + " --force_channel_last_input image"
+        + " --force_channel_last_output output_0",
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub.Device(device),
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = transpose_channel_first_to_last(
+            "image", sample_inputs, target_runtime
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(str(output_path / f"{model_name}.tflite"))
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = transpose_channel_last_to_first(
+            "output_0", inference_result, target_runtime
+        )
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model, supports_qnn=False)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
new file mode 100644
index 00000000..070615b3
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
@@ -0,0 +1,35 @@
+name: QuickSRNetMedium-Quantized
+# id must match with the model dir name in qai_hub_models
+id: quicksrnetmedium_quantized
+status: public
+headline: Upscale images and remove image noise.
+domain: Computer Vision
+description: QuickSRNet Medium is designed for upscaling images on mobile platforms
+  to sharpen in real-time.
+use_case: Super Resolution
+tags:
+  - quantized
+research_paper: https://arxiv.org/abs/2303.04336
+research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
+  for Faster Inference on Mobile Platforms'
+license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
+technical_details:
+  Model checkpoint: quicksrnet_medium_4x_checkpoint_int8
+  Input resolution: 128x128
+  Number of parameters: 61.0K
+  Model size: 244 KB
+applicable_scenarios:
+  - Virtual Real Estate Tours
+  - Gaming
+  - ARVR
+form_factors:
+  - Phone
+  - Tablet
+related_models: [xlsr_quantized, esrgan, quicksrnetmedium]
+has_static_banner: yes
+has_animated_banner: yes
+license_type: other
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py
new file mode 100644
index 00000000..3b6cb7b6
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py
@@ -0,0 +1,98 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.quicksrnetmedium.model import QuickSRNetMedium
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config_legacy_v2
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 2
+
+# Weights and config stored in S3 are sourced from
+# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_medium_4x_w8a8.json:
+# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_medium_4x_checkpoint_int8.pth
+# and
+# https://raw.githubusercontent.com/quic/aimet/release-aimet-1.23/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.js
+# Encodings were generated with AIMET QuantSim library
+QUANTIZED_WEIGHTS = "quicksrnet_medium_4x_checkpoint_int8.pth"
+AIMET_ENCODINGS = "aimet_quantization_encodings.json"
+SCALING_FACTOR = 4
+
+
+class QuickSRNetMediumQuantizable(AIMETQuantizableMixin, QuickSRNetMedium):
+    """QuickSRNetMedium with post train quantization support.
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        quicksrnet_model: QuantizationSimModel,
+    ) -> None:
+        QuickSRNetMedium.__init__(self, quicksrnet_model.model)
+        AIMETQuantizableMixin.__init__(
+            self, quicksrnet_model, needs_onnx_direct_aimet_export=True
+        )
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "QuickSRNetMediumQuantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        # Load Model
+        quicksrnet = QuickSRNetMedium.from_pretrained()
+        input_shape = quicksrnet.get_input_spec()["image"][0]
+        equalize_model(quicksrnet, input_shape)
+
+        # Download weights and quantization parameters
+        weights = CachedWebModelAsset.from_asset_store(
+            MODEL_ID, MODEL_ASSET_VERSION, QUANTIZED_WEIGHTS
+        ).fetch()
+        aimet_config = get_default_aimet_config_legacy_v2()
+
+        # Load the model weights and quantization parameters
+        # In this particular instance, the state_dict keys from the model are all named "model.<expected name>"
+        # where <expected name> is the name of each key in the weights file - without the word model.
+        # We rename all the keys to add the word model
+        state_dict = torch.load(weights, map_location=torch.device("cpu"))["state_dict"]
+        new_state_dict = {"model." + key: value for key, value in state_dict.items()}
+        quicksrnet.load_state_dict(new_state_dict)
+        sim = QuantizationSimModel(
+            quicksrnet,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=aimet_config,
+            dummy_input=torch.rand(input_shape),
+        )
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, AIMET_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        sim.model.eval()
+
+        return cls(sim)
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
new file mode 100644
index 00000000..2843af27
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
@@ -0,0 +1,108 @@
+models:
+- name: QuickSRNetMedium-Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 1411.0
+      throughput: 708.7172218284904
+      estimated_peak_memory_range:
+        min: 28672
+        max: 1545320
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 16
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 19
+      job_id: joprkj950
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:24:06.170051Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 1149.0
+      throughput: 870.3220191470845
+      estimated_peak_memory_range:
+        min: 20480
+        max: 20002352
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 16
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 19
+      job_id: jep28n4p6
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:24:06.170059Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/test.py b/qai_hub_models/models/quicksrnetmedium_quantized/test.py
new file mode 100644
index 00000000..4da76b9d
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/test.py
@@ -0,0 +1,91 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import os
+import tempfile
+import zipfile
+
+import numpy as np
+import pytest
+import torch
+
+from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
+from qai_hub_models.models.quicksrnetmedium_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models.quicksrnetmedium_quantized.demo import main as demo_main
+from qai_hub_models.models.quicksrnetmedium_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    QuickSRNetMediumQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
+from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
+
+OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_quantized_output.png"
+)
+
+
+@skip_clone_repo_check
+def test_task():
+    # AIMET Quantization Simulator introduces randomness. Eliminate that for this test.
+    torch.manual_seed(0)
+    image = load_image(IMAGE_ADDRESS)
+    model = QuickSRNetMediumQuantizable.from_pretrained()
+    app = SuperResolutionApp(model=model)
+    app_output_image = app.predict(image)[0]
+
+    output_image = load_image(OUTPUT_IMAGE_ADDRESS)
+    assert_most_close(
+        np.asarray(app_output_image, dtype=np.float32) / 255,
+        np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+@pytest.mark.trace
+@skip_clone_repo_check
+def test_trace():
+    image = load_image(IMAGE_ADDRESS)
+    output_image = load_image(OUTPUT_IMAGE_ADDRESS)
+    app = SuperResolutionApp(
+        QuickSRNetMediumQuantizable.from_pretrained().convert_to_torchscript()
+    )
+    app_output_image = app.predict(image)[0]
+
+    assert_most_close(
+        np.asarray(app_output_image, dtype=np.float32) / 255,
+        np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606")
+@skip_clone_repo_check
+def test_aimet_export():
+    model = QuickSRNetMediumQuantizable.from_pretrained()
+    name = model.__class__.__name__
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_zip = model.convert_to_onnx_and_aimet_encodings(
+            tmpdir,
+        )
+        assert os.path.exists(output_zip)
+        with zipfile.ZipFile(output_zip, "r") as zip:
+            assert zip.namelist() == [
+                f"{name}.aimet/",
+                f"{name}.aimet/{name}.onnx",
+                f"{name}.aimet/{name}.encodings",
+            ]
+
+
+@skip_clone_repo_check
+def test_demo():
+    demo_main(is_test=True)
+
+
+if __name__ == "__main__":
+    test_task()
diff --git a/qai_hub_models/models/quicksrnetsmall/README.md b/qai_hub_models/models/quicksrnetsmall/README.md
index 9b4c48ac..1c70ac12 100644
--- a/qai_hub_models/models/quicksrnetsmall/README.md
+++ b/qai_hub_models/models/quicksrnetsmall/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of QuickSRNetSmall found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetsmall).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.quicksrnetsmall.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of QuickSRNetSmall can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
diff --git a/qai_hub_models/models/quicksrnetsmall/conftest.py b/qai_hub_models/models/quicksrnetsmall/conftest.py
new file mode 100644
index 00000000..3d406665
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.quicksrnetsmall import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.quicksrnetsmall.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/quicksrnetsmall/demo.py b/qai_hub_models/models/quicksrnetsmall/demo.py
index 880f23bd..dd0e6c43 100644
--- a/qai_hub_models/models/quicksrnetsmall/demo.py
+++ b/qai_hub_models/models/quicksrnetsmall/demo.py
@@ -20,6 +20,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=QuickSRNetSmall,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/quicksrnetsmall/export.py b/qai_hub_models/models/quicksrnetsmall/export.py
index 35428879..f4ecbcca 100644
--- a/qai_hub_models/models/quicksrnetsmall/export.py
+++ b/qai_hub_models/models/quicksrnetsmall/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/quicksrnetsmall/info.yaml b/qai_hub_models/models/quicksrnetsmall/info.yaml
index 91aff954..128750f2 100644
--- a/qai_hub_models/models/quicksrnetsmall/info.yaml
+++ b/qai_hub_models/models/quicksrnetsmall/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/2303.04336
 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
   for Faster Inference on Mobile Platforms'
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_small_4x_checkpoint_float32
@@ -27,6 +28,7 @@ form_factors:
   - Tablet
 related_models: [xlsr, esrgan, quicksrnetlarge]
 has_static_banner: yes
-has_animated_banner: yes
+has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/quicksrnetsmall/model.py b/qai_hub_models/models/quicksrnetsmall/model.py
index 92ad52d1..54b22d82 100644
--- a/qai_hub_models/models/quicksrnetsmall/model.py
+++ b/qai_hub_models/models/quicksrnetsmall/model.py
@@ -40,8 +40,6 @@ def __init__(
     @classmethod
     def from_pretrained(cls) -> QuickSRNetSmall:
         model = _load_quicksrnet_source_model(
-            MODEL_ID,
-            MODEL_ASSET_VERSION,
             SCALING_FACTOR,
             NUM_CHANNELS,
             NUM_INTERMEDIATE_LAYERS,
diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml
index 3cf63319..d0815180 100644
--- a/qai_hub_models/models/quicksrnetsmall/perf.yaml
+++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: QuickSRNetSmall
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1333.0
-      throughput: 750.1875468867216
+      inference_time: 1338.0
+      throughput: 747.3841554559043
       estimated_peak_memory_range:
-        min: 16384
-        max: 8022608
+        min: 24576
+        max: 1376064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j1pvl9zr5
+      job_id: jygzezzg8
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:09:24.184304Z'
     torchscript_onnx_qnn:
-      inference_time: 1017.0
-      throughput: 983.284169124877
+      inference_time: 1025.0
+      throughput: 975.609756097561
       estimated_peak_memory_range:
         min: 212992
-        max: 64518392
+        max: 37245776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 12
-      job_id: jep2rv3qg
+      job_id: jnp10ok5q
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 839.0
+      throughput: 1191.8951132300358
+      estimated_peak_memory_range:
+        min: 16384
+        max: 17771072
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 8
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 11
+      job_id: jmg9voq57
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-22T22:36:34.984329Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:13:39.690790Z'
+    torchscript_onnx_qnn:
+      inference_time: 616.0
+      throughput: 1623.3766233766235
+      estimated_peak_memory_range:
+        min: 212992
+        max: 14001568
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 12
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 12
+      job_id: jz57zoqp3
+      job_status: Passed
diff --git a/qai_hub_models/models/quicksrnetsmall/test.py b/qai_hub_models/models/quicksrnetsmall/test.py
index d7645b6f..87ccaee0 100644
--- a/qai_hub_models/models/quicksrnetsmall/test.py
+++ b/qai_hub_models/models/quicksrnetsmall/test.py
@@ -35,5 +35,6 @@ def test_task():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/README.md b/qai_hub_models/models/quicksrnetsmall_quantized/README.md
new file mode 100644
index 00000000..a8cf057f
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/README.md
@@ -0,0 +1,54 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [QuickSRNetSmall-Quantized: Upscale images and remove image noise](https://aihub.qualcomm.com/models/quicksrnetsmall_quantized)
+
+QuickSRNet Small is designed for upscaling images on mobile platforms to sharpen in real-time.
+
+This is based on the implementation of QuickSRNetSmall-Quantized found
+[here](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetsmall_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.quicksrnetsmall_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.quicksrnetsmall_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of QuickSRNetSmall-Quantized can be found
+  [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336)
+* [Source Model Implementation](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/__init__.py b/qai_hub_models/models/quicksrnetsmall_quantized/__init__.py
new file mode 100644
index 00000000..ef07760b
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.super_resolution.app import (  # noqa: F401
+    SuperResolutionApp as App,
+)
+
+from .model import MODEL_ID  # noqa: F401
+from .model import QuickSRNetSmallQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py b/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py
new file mode 100644
index 00000000..d24003b5
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.quicksrnetsmall_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.quicksrnetsmall_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/demo.py b/qai_hub_models/models/quicksrnetsmall_quantized/demo.py
new file mode 100644
index 00000000..cb2dcd45
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/demo.py
@@ -0,0 +1,28 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo
+from qai_hub_models.models.quicksrnetsmall_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    QuickSRNetSmallQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnet_demo.jpg"
+)
+
+
+def main(is_test: bool = False):
+    super_resolution_demo(
+        QuickSRNetSmallQuantizable,
+        MODEL_ID,
+        default_image=IMAGE_ADDRESS,
+        is_test=is_test,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py
new file mode 100644
index 00000000..4944af8c
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py
@@ -0,0 +1,215 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.quicksrnetsmall_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "quicksrnetsmall_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "quicksrnetsmall_quantized",
+            "QuickSRNetSmall-Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime,
+        compile_options
+        + " --force_channel_last_input image"
+        + " --force_channel_last_output output_0",
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub.Device(device),
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = transpose_channel_first_to_last(
+            "image", sample_inputs, target_runtime
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(str(output_path / f"{model_name}.tflite"))
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = transpose_channel_last_to_first(
+            "output_0", inference_result, target_runtime
+        )
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model, supports_qnn=False)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
new file mode 100644
index 00000000..fad05b98
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
@@ -0,0 +1,35 @@
+name: QuickSRNetSmall-Quantized
+# id must match with the model dir name in qai_hub_models
+id: quicksrnetsmall_quantized
+status: public
+headline: Upscale images and remove image noise.
+domain: Computer Vision
+description: QuickSRNet Small is designed for upscaling images on mobile platforms
+  to sharpen in real-time.
+use_case: Super Resolution
+tags:
+  - quantized
+research_paper: https://arxiv.org/abs/2303.04336
+research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture
+  for Faster Inference on Mobile Platforms'
+license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
+technical_details:
+  Model checkpoint: quicksrnet_small_4x_checkpoint_int8
+  Input resolution: 128x128
+  Number of parameters: 33.3K
+  Model size: 42.5 KB
+applicable_scenarios:
+  - Virtual Real Estate Tours
+  - Gaming
+  - ARVR
+form_factors:
+  - Phone
+  - Tablet
+related_models: [xlsr_quantized, esrgan, quicksrnetsmall]
+has_static_banner: yes
+has_animated_banner: yes
+license_type: other
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py
new file mode 100644
index 00000000..5ba7fb5e
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py
@@ -0,0 +1,97 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.quicksrnetsmall.model import QuickSRNetSmall
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config_legacy_v2
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 2
+
+# Weights and config stored in S3 are sourced from
+# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_small_4x_w8a8.json:
+# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_small_4x_checkpoint_int8.pth
+# and
+# https://raw.githubusercontent.com/quic/aimet/release-aimet-1.23/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.js
+# Encodings were generated with AIMET QuantSim library
+QUANTIZED_WEIGHTS = "quicksrnet_small_4x_checkpoint_int8.pth"
+AIMET_ENCODINGS = "aimet_quantization_encodings.json"
+SCALING_FACTOR = 4
+
+
+class QuickSRNetSmallQuantizable(AIMETQuantizableMixin, QuickSRNetSmall):
+    """QuickSRNetSmall with post train quantization support.
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        quicksrnet_model: QuantizationSimModel,
+    ) -> None:
+        QuickSRNetSmall.__init__(self, quicksrnet_model.model)
+        AIMETQuantizableMixin.__init__(
+            self, quicksrnet_model, needs_onnx_direct_aimet_export=True
+        )
+
+    @classmethod
+    def from_pretrained(
+        cls, aimet_encodings: str | None = "DEFAULT"
+    ) -> "QuickSRNetSmallQuantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        # Load Model
+        quicksrnet = QuickSRNetSmall.from_pretrained()
+        input_shape = quicksrnet.get_input_spec()["image"][0]
+        equalize_model(quicksrnet, input_shape)
+
+        # Download weights and quantization parameters
+        weights = CachedWebModelAsset.from_asset_store(
+            MODEL_ID, MODEL_ASSET_VERSION, QUANTIZED_WEIGHTS
+        ).fetch()
+        aimet_config = get_default_aimet_config_legacy_v2()
+
+        # Load the model weights and quantization parameters
+        # In this particular instance, the state_dict keys from the model are all named "model.<expected name>"
+        # where <expected name> is the name of each key in the weights file - without the word model.
+        # We rename all the keys to add the word model
+        state_dict = torch.load(weights, map_location=torch.device("cpu"))["state_dict"]
+        new_state_dict = {"model." + key: value for key, value in state_dict.items()}
+        quicksrnet.load_state_dict(new_state_dict)
+        sim = QuantizationSimModel(
+            quicksrnet,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=aimet_config,
+            dummy_input=torch.rand(input_shape),
+        )
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, AIMET_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        sim.model.eval()
+
+        return cls(sim)
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
new file mode 100644
index 00000000..8766199c
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
@@ -0,0 +1,108 @@
+models:
+- name: QuickSRNetSmall-Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 1355.0
+      throughput: 738.0073800738007
+      estimated_peak_memory_range:
+        min: 20480
+        max: 2224928
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 10
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 13
+      job_id: jz57zknp3
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:22:40.346377Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 1099.0
+      throughput: 909.9181073703367
+      estimated_peak_memory_range:
+        min: 20480
+        max: 20205264
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 10
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 13
+      job_id: jqp4qm2go
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:22:40.346384Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/test.py b/qai_hub_models/models/quicksrnetsmall_quantized/test.py
new file mode 100644
index 00000000..be878b99
--- /dev/null
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/test.py
@@ -0,0 +1,87 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import os
+import tempfile
+import zipfile
+
+import numpy as np
+import pytest
+import torch
+
+from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
+from qai_hub_models.models.quicksrnetsmall_quantized.demo import IMAGE_ADDRESS
+from qai_hub_models.models.quicksrnetsmall_quantized.demo import main as demo_main
+from qai_hub_models.models.quicksrnetsmall_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    QuickSRNetSmallQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
+from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check
+
+OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_quantized_output.png"
+)
+
+
+@skip_clone_repo_check
+def test_task():
+    # AIMET Quantization Simulator introduces randomness. Eliminate that for this test.
+    torch.manual_seed(0)
+    image = load_image(IMAGE_ADDRESS)
+    model = QuickSRNetSmallQuantizable.from_pretrained()
+    app = SuperResolutionApp(model=model)
+    app_output_image = app.predict(image)[0]
+
+    output_image = load_image(OUTPUT_IMAGE_ADDRESS)
+    assert_most_close(
+        np.asarray(app_output_image, dtype=np.float32) / 255,
+        np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+@pytest.mark.trace
+@skip_clone_repo_check
+def test_trace():
+    image = load_image(IMAGE_ADDRESS)
+    output_image = load_image(OUTPUT_IMAGE_ADDRESS)
+    app = SuperResolutionApp(
+        QuickSRNetSmallQuantizable.from_pretrained().convert_to_torchscript()
+    )
+    app_output_image = app.predict(image)[0]
+
+    assert_most_close(
+        np.asarray(app_output_image, dtype=np.float32) / 255,
+        np.asarray(output_image, dtype=np.float32) / 255,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+    )
+
+
+@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606")
+@skip_clone_repo_check
+def test_aimet_export():
+    model = QuickSRNetSmallQuantizable.from_pretrained()
+    name = model.__class__.__name__
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_zip = model.convert_to_onnx_and_aimet_encodings(
+            tmpdir,
+        )
+        assert os.path.exists(output_zip)
+        with zipfile.ZipFile(output_zip, "r") as zip:
+            assert zip.namelist() == [
+                f"{name}.aimet/",
+                f"{name}.aimet/{name}.onnx",
+                f"{name}.aimet/{name}.encodings",
+            ]
+
+
+@skip_clone_repo_check
+def test_demo():
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/README.md b/qai_hub_models/models/real_esrgan_general_x4v3/README.md
index a3ce066c..982d103e 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/README.md
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Real-ESRGAN-General-x4v3 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/real_esrgan_general_x4v3).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.real_esrgan_general_x4v3.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Real-ESRGAN-General-x4v3 can be found
   [here](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data](https://arxiv.org/abs/2107.10833)
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/conftest.py b/qai_hub_models/models/real_esrgan_general_x4v3/conftest.py
new file mode 100644
index 00000000..a24a89d5
--- /dev/null
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.real_esrgan_general_x4v3 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.real_esrgan_general_x4v3.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/demo.py b/qai_hub_models/models/real_esrgan_general_x4v3/demo.py
index ae8541e7..27580a37 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/demo.py
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/demo.py
@@ -21,6 +21,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=Real_ESRGAN_General_x4v3,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/export.py b/qai_hub_models/models/real_esrgan_general_x4v3/export.py
index 4f86924c..7f5ce2b6 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/export.py
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -109,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,36 +158,40 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/info.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/info.yaml
index f2dbd297..157e0325 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/info.yaml
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/2107.10833
 research_paper_title: 'Real-ESRGAN: Training Real-World Blind Super-Resolution with
   Pure Synthetic Data'
 license: https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/xinntao/Real-ESRGAN/tree/master
 technical_details:
   Model checkpoint: realesr-general-x4v3
@@ -33,4 +34,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
index 5305963f..c79aa05c 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Real-ESRGAN-General-x4v3
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7168.0
-      throughput: 139.50892857142858
+      inference_time: 7285.0
+      throughput: 137.26835964310226
       estimated_peak_memory_range:
-        min: 15761408
-        max: 27106520
+        min: 15745024
+        max: 20241416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: jmg9zy3qp
+      job_id: j1glno2pv
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:36.720476Z'
     torchscript_onnx_qnn:
-      inference_time: 6995.0
-      throughput: 142.9592566118656
+      inference_time: 6983.0
+      throughput: 143.20492624946297
       estimated_peak_memory_range:
-        min: 45056
-        max: 67127640
+        min: 12288
+        max: 10852600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 73
-      job_id: jnp1nwdkg
+      job_id: j1p3kxm52
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 5660.0
+      throughput: 176.67844522968198
+      estimated_peak_memory_range:
+        min: 57344
+        max: 53042192
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 69
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 72
+      job_id: jw566rn5o
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:15:20.798589Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:17:08.081378Z'
+    torchscript_onnx_qnn:
+      inference_time: 4939.0
+      throughput: 202.47013565499088
+      estimated_peak_memory_range:
+        min: 208896
+        max: 32676160
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 73
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 73
+      job_id: jwgoyo158
+      job_status: Passed
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/requirements.txt b/qai_hub_models/models/real_esrgan_general_x4v3/requirements.txt
index 80ca5630..00e6cc01 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/requirements.txt
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/requirements.txt
@@ -1,6 +1,3 @@
-opencv-python
-PyYAML
-requests
-scipy
-seaborn
-basicsr
+scipy==1.8.1
+seaborn==0.11.0
+basicsr==1.4.2
diff --git a/qai_hub_models/models/real_esrgan_x4plus/README.md b/qai_hub_models/models/real_esrgan_x4plus/README.md
index 9ef627ff..b5c8fd81 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/README.md
+++ b/qai_hub_models/models/real_esrgan_x4plus/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Real-ESRGAN-x4plus found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/real_esrgan_x4plus).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.real_esrgan_x4plus.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Real-ESRGAN-x4plus can be found
   [here](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data](https://arxiv.org/abs/2107.10833)
diff --git a/qai_hub_models/models/real_esrgan_x4plus/conftest.py b/qai_hub_models/models/real_esrgan_x4plus/conftest.py
new file mode 100644
index 00000000..c6d91d61
--- /dev/null
+++ b/qai_hub_models/models/real_esrgan_x4plus/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.real_esrgan_x4plus import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.real_esrgan_x4plus.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/real_esrgan_x4plus/demo.py b/qai_hub_models/models/real_esrgan_x4plus/demo.py
index 5eeb17e2..60e6495a 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/demo.py
+++ b/qai_hub_models/models/real_esrgan_x4plus/demo.py
@@ -21,6 +21,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=Real_ESRGAN_x4plus,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/real_esrgan_x4plus/export.py b/qai_hub_models/models/real_esrgan_x4plus/export.py
index c0bade9f..73ab228c 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/export.py
+++ b/qai_hub_models/models/real_esrgan_x4plus/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -26,6 +26,7 @@
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
     print_inference_metrics,
+    print_on_target_demo_cmd,
     print_profile_metrics_from_job,
 )
 from qai_hub_models.utils.qai_hub_helpers import (
@@ -107,65 +108,77 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
         sample_inputs = model.sample_inputs(input_spec)
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=sample_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
     return (compile_job, profile_job, inference_job)
 
 
diff --git a/qai_hub_models/models/real_esrgan_x4plus/info.yaml b/qai_hub_models/models/real_esrgan_x4plus/info.yaml
index 3b9bc903..578bb93f 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/info.yaml
+++ b/qai_hub_models/models/real_esrgan_x4plus/info.yaml
@@ -10,6 +10,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2107.10833
 research_paper_title: "Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data"
 license: https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/xinntao/Real-ESRGAN
 technical_details:
   Number of parameters: 16.7M
@@ -27,4 +28,5 @@ related_models: ['esrgan', 'real_esrgan_general_x4v3']
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
index d059c356..3bc0350c 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
+++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
@@ -17,37 +17,48 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Real-ESRGAN-x4plus
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 69426.0
-      throughput: 14.40382565609426
+      inference_time: 'null'
+      throughput: 'null'
       estimated_peak_memory_range:
-        min: 3272704
-        max: 6458720
-      primary_compute_unit: NPU
-      precision: fp16
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
       layer_info:
-        layers_on_npu: 1028
+        layers_on_npu: 0
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 1028
-      job_id: jygzl8665
-      job_status: Passed
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:52.767646Z'
     torchscript_onnx_qnn:
-      inference_time: 67244.0
-      throughput: 14.87121527571233
+      inference_time: 66635.0
+      throughput: 15.007128385983343
       estimated_peak_memory_range:
-        min: 102400
-        max: 106071688
+        min: 94208
+        max: 104137800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: jygzljxz5
+      job_id: jz57zzlp3
       job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-22T18:50:48.142201Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:16:11.670851Z'
+    torchscript_onnx_qnn:
+      inference_time: 50978.0
+      throughput: 19.61630507277649
+      estimated_peak_memory_range:
+        min: 90112
+        max: 248878432
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1031
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1031
+      job_id: jqp4qqvgo
+      job_status: Passed
diff --git a/qai_hub_models/models/real_esrgan_x4plus/requirements.txt b/qai_hub_models/models/real_esrgan_x4plus/requirements.txt
index 6292b978..00e6cc01 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/requirements.txt
+++ b/qai_hub_models/models/real_esrgan_x4plus/requirements.txt
@@ -1,4 +1,3 @@
-opencv-python
-scipy
-seaborn
-basicsr
+scipy==1.8.1
+seaborn==0.11.0
+basicsr==1.4.2
diff --git a/qai_hub_models/models/real_esrgan_x4plus/test.py b/qai_hub_models/models/real_esrgan_x4plus/test.py
index 905f473c..e473252b 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/test.py
+++ b/qai_hub_models/models/real_esrgan_x4plus/test.py
@@ -35,5 +35,6 @@ def test_task():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/regnet/README.md b/qai_hub_models/models/regnet/README.md
index a123a009..9448b7b8 100644
--- a/qai_hub_models/models/regnet/README.md
+++ b/qai_hub_models/models/regnet/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of RegNet found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/regnet).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.regnet.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of RegNet can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678)
diff --git a/qai_hub_models/models/regnet/conftest.py b/qai_hub_models/models/regnet/conftest.py
new file mode 100644
index 00000000..27aafbbe
--- /dev/null
+++ b/qai_hub_models/models/regnet/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.regnet import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.regnet.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/regnet/demo.py b/qai_hub_models/models/regnet/demo.py
index 3fe6310f..b4fb48c7 100644
--- a/qai_hub_models/models/regnet/demo.py
+++ b/qai_hub_models/models/regnet/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.regnet.model import RegNet
+from qai_hub_models.models.regnet.model import MODEL_ID, RegNet
 
 
 def main(is_test: bool = False):
-    imagenet_demo(RegNet, is_test)
+    imagenet_demo(RegNet, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/regnet/export.py b/qai_hub_models/models/regnet/export.py
index ff916e30..2d4e54ce 100644
--- a/qai_hub_models/models/regnet/export.py
+++ b/qai_hub_models/models/regnet/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/regnet/info.yaml b/qai_hub_models/models/regnet/info.yaml
index 457a281f..863f909d 100644
--- a/qai_hub_models/models/regnet/info.yaml
+++ b/qai_hub_models/models/regnet/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/2003.13678
 research_paper_title: Designing Network Design Spaces
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml
index 896bd9cb..f5814c03 100644
--- a/qai_hub_models/models/regnet/perf.yaml
+++ b/qai_hub_models/models/regnet/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: RegNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1921.0
-      throughput: 520.5622071837585
+      inference_time: 1974.0
+      throughput: 506.5856129685917
       estimated_peak_memory_range:
-        min: 16384
-        max: 1931624
+        min: 32768
+        max: 1789416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: jogk2q8og
+      job_id: jqpyey4gy
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:08:16.252038Z'
     torchscript_onnx_qnn:
-      inference_time: 1659.0
-      throughput: 602.7727546714889
+      inference_time: 1675.0
+      throughput: 597.0149253731344
       estimated_peak_memory_range:
-        min: 237568
-        max: 59498896
+        min: 241664
+        max: 59486296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 187
-      job_id: jn5qlrvmp
+      job_id: j1p8ok8g9
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1359.0
+      throughput: 735.8351729212657
+      estimated_peak_memory_range:
+        min: 16384
+        max: 131931280
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 112
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 112
+      job_id: j2p0yxegw
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:36:39.546315Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:11:23.127753Z'
+    torchscript_onnx_qnn:
+      inference_time: 1197.0
+      throughput: 835.421888053467
+      estimated_peak_memory_range:
+        min: 618496
+        max: 68520544
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 187
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 187
+      job_id: jogkzkogd
+      job_status: Passed
diff --git a/qai_hub_models/models/regnet/test.py b/qai_hub_models/models/regnet/test.py
index f6ebd563..fdb34aaf 100644
--- a/qai_hub_models/models/regnet/test.py
+++ b/qai_hub_models/models/regnet/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -21,6 +23,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(RegNet.from_pretrained())
 
diff --git a/qai_hub_models/models/resnet101/README.md b/qai_hub_models/models/resnet101/README.md
index 88189d34..415f99bb 100644
--- a/qai_hub_models/models/resnet101/README.md
+++ b/qai_hub_models/models/resnet101/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNet101 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet101).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnet101.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet101 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet101/conftest.py b/qai_hub_models/models/resnet101/conftest.py
new file mode 100644
index 00000000..b2281868
--- /dev/null
+++ b/qai_hub_models/models/resnet101/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnet101 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnet101.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnet101/demo.py b/qai_hub_models/models/resnet101/demo.py
index 1b1a3524..9c59e469 100644
--- a/qai_hub_models/models/resnet101/demo.py
+++ b/qai_hub_models/models/resnet101/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnet101.model import ResNet101
+from qai_hub_models.models.resnet101.model import MODEL_ID, ResNet101
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNet101, is_test)
+    imagenet_demo(ResNet101, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnet101/export.py b/qai_hub_models/models/resnet101/export.py
index d9c41ba1..92c2ea4b 100644
--- a/qai_hub_models/models/resnet101/export.py
+++ b/qai_hub_models/models/resnet101/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnet101/info.yaml b/qai_hub_models/models/resnet101/info.yaml
index 7d37336b..f1410fb4 100644
--- a/qai_hub_models/models/resnet101/info.yaml
+++ b/qai_hub_models/models/resnet101/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1512.03385
 research_paper_title: Deep Residual Learning for Image Recognition
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml
index 3baa5107..cd12a3b8 100644
--- a/qai_hub_models/models/resnet101/perf.yaml
+++ b/qai_hub_models/models/resnet101/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNet101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3008.0
-      throughput: 332.4468085106383
+      inference_time: 2993.0
+      throughput: 334.1129301703976
       estimated_peak_memory_range:
         min: 28672
-        max: 1505496
+        max: 1903408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: jnp1nw6lg
+      job_id: j7gjxmxpd
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:51:59.720577Z'
     torchscript_onnx_qnn:
-      inference_time: 2895.0
-      throughput: 345.4231433506045
+      inference_time: 2921.0
+      throughput: 342.3485107839781
       estimated_peak_memory_range:
         min: 622592
-        max: 226606408
+        max: 226849752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 244
-      job_id: jvgddq2lg
+      job_id: jygze9kg8
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 2221.0
+      throughput: 450.24763619990995
+      estimated_peak_memory_range:
+        min: 16384
+        max: 103000720
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 145
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 145
+      job_id: jlpe911gr
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:20:33.212112Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:54:07.329383Z'
+    torchscript_onnx_qnn:
+      inference_time: 2126.0
+      throughput: 470.36688617121354
+      estimated_peak_memory_range:
+        min: 618496
+        max: 71779728
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 244
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 244
+      job_id: jz5won6p1
+      job_status: Passed
diff --git a/qai_hub_models/models/resnet101/test.py b/qai_hub_models/models/resnet101/test.py
index f27696cd..ac498cf1 100644
--- a/qai_hub_models/models/resnet101/test.py
+++ b/qai_hub_models/models/resnet101/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -21,6 +23,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ResNet101.from_pretrained())
 
diff --git a/qai_hub_models/models/resnet101_quantized/README.md b/qai_hub_models/models/resnet101_quantized/README.md
index 64886381..822c7f98 100644
--- a/qai_hub_models/models/resnet101_quantized/README.md
+++ b/qai_hub_models/models/resnet101_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNet101Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet101_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnet101_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet101Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet101_quantized/conftest.py b/qai_hub_models/models/resnet101_quantized/conftest.py
new file mode 100644
index 00000000..f7bf84da
--- /dev/null
+++ b/qai_hub_models/models/resnet101_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnet101_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnet101_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnet101_quantized/demo.py b/qai_hub_models/models/resnet101_quantized/demo.py
index a3c5dff7..4f7f5032 100644
--- a/qai_hub_models/models/resnet101_quantized/demo.py
+++ b/qai_hub_models/models/resnet101_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnet101_quantized.model import ResNet101Quantizable
+from qai_hub_models.models.resnet101_quantized.model import (
+    MODEL_ID,
+    ResNet101Quantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNet101Quantizable, is_test)
+    imagenet_demo(ResNet101Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py
index 01fb1b0c..774ea807 100644
--- a/qai_hub_models/models/resnet101_quantized/export.py
+++ b/qai_hub_models/models/resnet101_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,42 +163,44 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(model_cls=Model)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/resnet101_quantized/info.yaml b/qai_hub_models/models/resnet101_quantized/info.yaml
index e9f4491f..e25f53d7 100644
--- a/qai_hub_models/models/resnet101_quantized/info.yaml
+++ b/qai_hub_models/models/resnet101_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1512.03385
 research_paper_title: Deep Residual Learning for Image Recognition
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnet101_quantized/model.py b/qai_hub_models/models/resnet101_quantized/model.py
index 3ff4ad4c..bcfb3730 100644
--- a/qai_hub_models/models/resnet101_quantized/model.py
+++ b/qai_hub_models/models/resnet101_quantized/model.py
@@ -8,7 +8,6 @@
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet import (
     AIMETQuantizableMixin,
-    HubCompileOptionsInt8Mixin,
 )
 
 # isort: on
@@ -18,19 +17,22 @@
     equalize_bn_folded_model,
     fold_all_batch_norms,
 )
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.resnet101.model import ResNet101
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 3
+MODEL_ASSET_VERSION = 4
 DEFAULT_ENCODINGS = "resnet101_quantized_encodings.json"
 
 
 class ResNet101Quantizable(
-    HubCompileOptionsInt8Mixin, AIMETQuantizableMixin, ResNet101
+    AIMETQuantizableMixin,
+    ResNet101,
 ):
     """ResNet101 with post train quantization support.
 
@@ -43,9 +45,15 @@ def __init__(
     ) -> None:
         ResNet101.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=False
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
@@ -59,17 +67,18 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = ResNet101.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+        model = prepare_model(model)
         dummy_input = torch.rand(input_shape)
         pairs = fold_all_batch_norms(model, input_shape, dummy_input)
         equalize_bn_folded_model(model, input_shape, pairs, dummy_input)
 
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
+            config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
 
@@ -82,3 +91,11 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml
index b8f30516..f5bd3459 100644
--- a/qai_hub_models/models/resnet101_quantized/perf.yaml
+++ b/qai_hub_models/models/resnet101_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNet101Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 74926.0
-      throughput: 13.346501881856765
+      inference_time: 1122.0
+      throughput: 891.2655971479501
       estimated_peak_memory_range:
-        min: 151552
-        max: 2762960
+        min: 12288
+        max: 2141424
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 149
+        layers_on_npu: 146
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 149
-      job_id: joprl2nep
+        total_layers: 146
+      job_id: jvgdw7z5j
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:05:31.212967Z'
     torchscript_onnx_qnn:
-      inference_time: 'null'
-      throughput: 'null'
+      inference_time: 1101.0
+      throughput: 908.2652134423251
+      estimated_peak_memory_range:
+        min: 12288
+        max: 196790880
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jvgdw765j
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 839.0
+      throughput: 1191.8951132300358
       estimated_peak_memory_range:
-        min: 0
-        max: 0
-      primary_compute_unit: 'null'
-      precision: 'null'
+        min: 12288
+        max: 91234848
+      primary_compute_unit: NPU
+      precision: int8
       layer_info:
-        layers_on_npu: 0
+        layers_on_npu: 146
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 0
-      job_id: ''
-      job_status: Skipped
+        total_layers: 146
+      job_id: jmg9v9m57
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:35.238685Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:07:20.101134Z'
+    torchscript_onnx_qnn:
+      inference_time: 830.0
+      throughput: 1204.8192771084337
+      estimated_peak_memory_range:
+        min: 167936
+        max: 53969312
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jo5mrv7gk
+      job_status: Passed
diff --git a/qai_hub_models/models/resnet101_quantized/test.py b/qai_hub_models/models/resnet101_quantized/test.py
index fb9b6b7c..876ebffe 100644
--- a/qai_hub_models/models/resnet101_quantized/test.py
+++ b/qai_hub_models/models/resnet101_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.resnet101_quantized.demo import main as demo_main
 from qai_hub_models.models.resnet101_quantized.model import (
@@ -26,16 +25,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        ResNet101Quantizable.from_pretrained(),
-        is_quantized=True,
-        diff_tol=0.005,
-        rtol=0.02,
-        atol=0.2,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/resnet18/README.md b/qai_hub_models/models/resnet18/README.md
index d9466a14..0be3c986 100644
--- a/qai_hub_models/models/resnet18/README.md
+++ b/qai_hub_models/models/resnet18/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNet18 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet18).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnet18.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet18 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet18/conftest.py b/qai_hub_models/models/resnet18/conftest.py
new file mode 100644
index 00000000..86a5865d
--- /dev/null
+++ b/qai_hub_models/models/resnet18/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnet18 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnet18.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnet18/demo.py b/qai_hub_models/models/resnet18/demo.py
index 8d7dcc1b..ccbabc44 100644
--- a/qai_hub_models/models/resnet18/demo.py
+++ b/qai_hub_models/models/resnet18/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnet18.model import ResNet18
+from qai_hub_models.models.resnet18.model import MODEL_ID, ResNet18
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNet18, is_test)
+    imagenet_demo(ResNet18, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnet18/export.py b/qai_hub_models/models/resnet18/export.py
index b01f4ead..6dc6e2b2 100644
--- a/qai_hub_models/models/resnet18/export.py
+++ b/qai_hub_models/models/resnet18/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnet18/info.yaml b/qai_hub_models/models/resnet18/info.yaml
index 91be8429..8b06179f 100644
--- a/qai_hub_models/models/resnet18/info.yaml
+++ b/qai_hub_models/models/resnet18/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1512.03385
 research_paper_title: Deep Residual Learning for Image Recognition
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml
index f58a8d78..0bc59fb1 100644
--- a/qai_hub_models/models/resnet18/perf.yaml
+++ b/qai_hub_models/models/resnet18/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNet18
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1054.0
-      throughput: 948.7666034155598
+      inference_time: 1053.0
+      throughput: 949.667616334283
       estimated_peak_memory_range:
-        min: 12288
-        max: 1722456
+        min: 32768
+        max: 2028832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 36
-      job_id: j1p3z1xx5
+      job_id: j2p0y8egw
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:41:20.341762Z'
     torchscript_onnx_qnn:
-      inference_time: 980.0
-      throughput: 1020.4081632653061
+      inference_time: 989.0
+      throughput: 1011.1223458038422
       estimated_peak_memory_range:
-        min: 16384
-        max: 84353688
+        min: 12288
+        max: 84688848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 52
-      job_id: jwgolno4g
+      job_id: jogkzwogd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 771.0
+      throughput: 1297.0168612191958
+      estimated_peak_memory_range:
+        min: 12288
+        max: 23627952
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 36
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 36
+      job_id: j1p8od8g9
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:25:13.005640Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:45:48.504221Z'
+    torchscript_onnx_qnn:
+      inference_time: 717.0
+      throughput: 1394.700139470014
+      estimated_peak_memory_range:
+        min: 630784
+        max: 25268288
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 52
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 52
+      job_id: jn5q8xm57
+      job_status: Passed
diff --git a/qai_hub_models/models/resnet18/test.py b/qai_hub_models/models/resnet18/test.py
index 591e93e3..eac1e51e 100644
--- a/qai_hub_models/models/resnet18/test.py
+++ b/qai_hub_models/models/resnet18/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -21,6 +23,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ResNet18.from_pretrained())
 
diff --git a/qai_hub_models/models/resnet18_quantized/README.md b/qai_hub_models/models/resnet18_quantized/README.md
index 4952a434..676945a5 100644
--- a/qai_hub_models/models/resnet18_quantized/README.md
+++ b/qai_hub_models/models/resnet18_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNet18Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet18_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnet18_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet18Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet18_quantized/conftest.py b/qai_hub_models/models/resnet18_quantized/conftest.py
new file mode 100644
index 00000000..7afa5897
--- /dev/null
+++ b/qai_hub_models/models/resnet18_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnet18_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnet18_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnet18_quantized/demo.py b/qai_hub_models/models/resnet18_quantized/demo.py
index bc848d5c..4d9909dc 100644
--- a/qai_hub_models/models/resnet18_quantized/demo.py
+++ b/qai_hub_models/models/resnet18_quantized/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnet18_quantized.model import ResNet18Quantizable
+from qai_hub_models.models.resnet18_quantized.model import MODEL_ID, ResNet18Quantizable
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNet18Quantizable, is_test)
+    imagenet_demo(ResNet18Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py
index 0d8d0e85..d8d39572 100644
--- a/qai_hub_models/models/resnet18_quantized/export.py
+++ b/qai_hub_models/models/resnet18_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,35 +163,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnet18_quantized/info.yaml b/qai_hub_models/models/resnet18_quantized/info.yaml
index 20fcdfc5..f132fa42 100644
--- a/qai_hub_models/models/resnet18_quantized/info.yaml
+++ b/qai_hub_models/models/resnet18_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1512.03385
 research_paper_title: Deep Residual Learning for Image Recognition
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnet18_quantized/model.py b/qai_hub_models/models/resnet18_quantized/model.py
index 7641d899..e000d973 100644
--- a/qai_hub_models/models/resnet18_quantized/model.py
+++ b/qai_hub_models/models/resnet18_quantized/model.py
@@ -8,26 +8,26 @@
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet import (
     AIMETQuantizableMixin,
-    HubCompileOptionsInt8Mixin,
 )
 
 # isort: on
 
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.resnet18.model import ResNet18
-from qai_hub_models.utils.aimet.config_loader import get_aimet_config_path
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 6
+MODEL_ASSET_VERSION = 7
 DEFAULT_ENCODINGS = "resnet18_quantized_encodings.json"
-AIMET_CONFIG = "default_config_per_channel_qnn"
 
 
-class ResNet18Quantizable(HubCompileOptionsInt8Mixin, AIMETQuantizableMixin, ResNet18):
+class ResNet18Quantizable(AIMETQuantizableMixin, ResNet18):
     """ResNet with post train quantization support.
 
     Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
@@ -39,9 +39,15 @@ def __init__(
     ) -> None:
         ResNet18.__init__(self, resnet18_model.model)
         AIMETQuantizableMixin.__init__(
-            self, resnet18_model, needs_onnx_direct_aimet_export=False
+            self,
+            resnet18_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
@@ -54,16 +60,17 @@ def from_pretrained(
             elif None: Doesn't load any encodings. Used when computing encodings.
             else: Interprets as a filepath and loads the encodings stored there.
         """
-        resnet18 = ResNet18.from_pretrained()
-        input_shape = resnet18.get_input_spec()["image_tensor"][0]
+        model = ResNet18.from_pretrained()
+        input_shape = cls.get_input_spec()["image_tensor"][0]
 
-        equalize_model(resnet18, input_shape)
+        model = prepare_model(model)
+        equalize_model(model, input_shape)
         sim = QuantizationSimModel(
-            resnet18.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_aimet_config_path(AIMET_CONFIG),
+            config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
 
@@ -76,3 +83,11 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml
index 38fc614e..df4b298b 100644
--- a/qai_hub_models/models/resnet18_quantized/perf.yaml
+++ b/qai_hub_models/models/resnet18_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNet18Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 375.0
-      throughput: 2666.6666666666665
+      inference_time: 356.0
+      throughput: 2808.9887640449438
       estimated_peak_memory_range:
         min: 12288
-        max: 14684784
+        max: 1529808
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 37
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: j2p0mj06g
+      job_id: j1p3k8m52
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:59:49.885782Z'
     torchscript_onnx_qnn:
-      inference_time: 359.0
-      throughput: 2785.515320334262
+      inference_time: 354.0
+      throughput: 2824.858757062147
       estimated_peak_memory_range:
-        min: 12288
-        max: 71291800
+        min: 20480
+        max: 62738248
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 35
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 35
-      job_id: j1glyxm85
+      job_id: j1pv34z5x
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 301.0
+      throughput: 3322.2591362126245
+      estimated_peak_memory_range:
+        min: 12288
+        max: 23414560
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 37
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 37
+      job_id: jwgoym158
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-23T04:45:17.544674Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:03:01.789875Z'
+    torchscript_onnx_qnn:
+      inference_time: 282.0
+      throughput: 3546.099290780142
+      estimated_peak_memory_range:
+        min: 12288
+        max: 21538672
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 35
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 35
+      job_id: j7gjx11pd
+      job_status: Passed
diff --git a/qai_hub_models/models/resnet18_quantized/test.py b/qai_hub_models/models/resnet18_quantized/test.py
index 88db0ac9..4405e8d2 100644
--- a/qai_hub_models/models/resnet18_quantized/test.py
+++ b/qai_hub_models/models/resnet18_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.resnet18_quantized.demo import main as demo_main
 from qai_hub_models.models.resnet18_quantized.model import (
@@ -26,16 +25,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        ResNet18Quantizable.from_pretrained(),
-        diff_tol=0.007,
-        rtol=0.02,
-        atol=0.2,
-        is_quantized=True,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/resnet50/README.md b/qai_hub_models/models/resnet50/README.md
index ec7ff6e2..210c9f96 100644
--- a/qai_hub_models/models/resnet50/README.md
+++ b/qai_hub_models/models/resnet50/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNet50 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet50).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnet50.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
diff --git a/qai_hub_models/models/resnet50/conftest.py b/qai_hub_models/models/resnet50/conftest.py
new file mode 100644
index 00000000..809b3143
--- /dev/null
+++ b/qai_hub_models/models/resnet50/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnet50 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnet50.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnet50/demo.py b/qai_hub_models/models/resnet50/demo.py
index 81d4a995..8b596d3d 100644
--- a/qai_hub_models/models/resnet50/demo.py
+++ b/qai_hub_models/models/resnet50/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnet50.model import ResNet50
+from qai_hub_models.models.resnet50.model import MODEL_ID, ResNet50
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNet50, is_test)
+    imagenet_demo(ResNet50, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnet50/export.py b/qai_hub_models/models/resnet50/export.py
index ceacf649..e7835d92 100644
--- a/qai_hub_models/models/resnet50/export.py
+++ b/qai_hub_models/models/resnet50/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnet50/info.yaml b/qai_hub_models/models/resnet50/info.yaml
index 5f726eb5..7b52d418 100644
--- a/qai_hub_models/models/resnet50/info.yaml
+++ b/qai_hub_models/models/resnet50/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1512.03385
 research_paper_title: Deep Residual Learning for Image Recognition
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -34,6 +35,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml
index f070af48..0037092b 100644
--- a/qai_hub_models/models/resnet50/perf.yaml
+++ b/qai_hub_models/models/resnet50/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1904.0
-      throughput: 525.2100840336135
+      inference_time: 1898.0
+      throughput: 526.8703898840885
       estimated_peak_memory_range:
-        min: 20480
-        max: 2314168
+        min: 36864
+        max: 2234848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 77
-      job_id: j1p8em6zp
+      job_id: j2p0yk0gw
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:27:02.086108Z'
     torchscript_onnx_qnn:
-      inference_time: 1768.0
-      throughput: 565.6108597285067
+      inference_time: 1790.0
+      throughput: 558.659217877095
       estimated_peak_memory_range:
-        min: 634880
-        max: 186280024
+        min: 626688
+        max: 186659664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 125
-      job_id: jogk2qoyg
+      job_id: jogkzdvgd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1392.0
+      throughput: 718.3908045977012
+      estimated_peak_memory_range:
+        min: 16384
+        max: 68731008
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 77
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 77
+      job_id: j1p8o8qg9
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:07:34.762219Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:28:58.275338Z'
+    torchscript_onnx_qnn:
+      inference_time: 1307.0
+      throughput: 765.1109410864575
+      estimated_peak_memory_range:
+        min: 0
+        max: 45987408
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 125
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 125
+      job_id: jn5q8we57
+      job_status: Passed
diff --git a/qai_hub_models/models/resnet50/test.py b/qai_hub_models/models/resnet50/test.py
index ca60e960..911fc066 100644
--- a/qai_hub_models/models/resnet50/test.py
+++ b/qai_hub_models/models/resnet50/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -21,6 +23,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ResNet50.from_pretrained())
 
diff --git a/qai_hub_models/models/resnext101/README.md b/qai_hub_models/models/resnext101/README.md
index 17da6857..0b3904ee 100644
--- a/qai_hub_models/models/resnext101/README.md
+++ b/qai_hub_models/models/resnext101/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNeXt101 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext101).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnext101.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt101 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext101/conftest.py b/qai_hub_models/models/resnext101/conftest.py
new file mode 100644
index 00000000..fbfcb9c4
--- /dev/null
+++ b/qai_hub_models/models/resnext101/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnext101 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnext101.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnext101/demo.py b/qai_hub_models/models/resnext101/demo.py
index ffb7bc4d..c9f8003d 100644
--- a/qai_hub_models/models/resnext101/demo.py
+++ b/qai_hub_models/models/resnext101/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnext101.model import ResNeXt101
+from qai_hub_models.models.resnext101.model import MODEL_ID, ResNeXt101
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNeXt101, is_test)
+    imagenet_demo(ResNeXt101, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnext101/export.py b/qai_hub_models/models/resnext101/export.py
index 5d591b8f..e1b1fcd6 100644
--- a/qai_hub_models/models/resnext101/export.py
+++ b/qai_hub_models/models/resnext101/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnext101/info.yaml b/qai_hub_models/models/resnext101/info.yaml
index e2662e67..40e7612f 100644
--- a/qai_hub_models/models/resnext101/info.yaml
+++ b/qai_hub_models/models/resnext101/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1611.05431
 research_paper_title: Aggregated Residual Transformations for Deep Neural Networks
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -34,6 +35,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml
index 05f03000..d8d493ff 100644
--- a/qai_hub_models/models/resnext101/perf.yaml
+++ b/qai_hub_models/models/resnext101/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNeXt101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6434.0
-      throughput: 155.4243083618278
+      inference_time: 6315.0
+      throughput: 158.3531274742676
       estimated_peak_memory_range:
         min: 28672
-        max: 2709368
+        max: 2570472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: j1pvlr475
+      job_id: j2p0yrngw
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:40:16.043830Z'
     torchscript_onnx_qnn:
-      inference_time: 6146.0
-      throughput: 162.70745200130165
+      inference_time: 6079.0
+      throughput: 164.50074025333114
       estimated_peak_memory_range:
         min: 16384
-        max: 38657672
+        max: 34444952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 244
-      job_id: j7gjr217p
+      job_id: jogkzyngd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 4552.0
+      throughput: 219.6836555360281
+      estimated_peak_memory_range:
+        min: 20480
+        max: 357156576
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 145
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 145
+      job_id: j1p8o7og9
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:21:26.759411Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:42:21.825443Z'
+    torchscript_onnx_qnn:
+      inference_time: 4377.0
+      throughput: 228.4669865204478
+      estimated_peak_memory_range:
+        min: 618496
+        max: 123852368
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 244
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 244
+      job_id: jn5q82o57
+      job_status: Passed
diff --git a/qai_hub_models/models/resnext101/test.py b/qai_hub_models/models/resnext101/test.py
index 257a15b6..dab967e5 100644
--- a/qai_hub_models/models/resnext101/test.py
+++ b/qai_hub_models/models/resnext101/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -16,6 +18,7 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ResNeXt101.from_pretrained())
 
diff --git a/qai_hub_models/models/resnext101_quantized/README.md b/qai_hub_models/models/resnext101_quantized/README.md
index 3e071fe6..e5a91213 100644
--- a/qai_hub_models/models/resnext101_quantized/README.md
+++ b/qai_hub_models/models/resnext101_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNeXt101Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext101_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnext101_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt101Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext101_quantized/conftest.py b/qai_hub_models/models/resnext101_quantized/conftest.py
new file mode 100644
index 00000000..66e0502c
--- /dev/null
+++ b/qai_hub_models/models/resnext101_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnext101_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnext101_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnext101_quantized/demo.py b/qai_hub_models/models/resnext101_quantized/demo.py
index 51d4cde1..140307a2 100644
--- a/qai_hub_models/models/resnext101_quantized/demo.py
+++ b/qai_hub_models/models/resnext101_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnext101_quantized.model import ResNeXt101Quantizable
+from qai_hub_models.models.resnext101_quantized.model import (
+    MODEL_ID,
+    ResNeXt101Quantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNeXt101Quantizable, is_test)
+    imagenet_demo(ResNeXt101Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py
index ed09dd42..865e3205 100644
--- a/qai_hub_models/models/resnext101_quantized/export.py
+++ b/qai_hub_models/models/resnext101_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,35 +163,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnext101_quantized/info.yaml b/qai_hub_models/models/resnext101_quantized/info.yaml
index 8d169468..47a2496c 100644
--- a/qai_hub_models/models/resnext101_quantized/info.yaml
+++ b/qai_hub_models/models/resnext101_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1611.05431
 research_paper_title: Aggregated Residual Transformations for Deep Neural Networks
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnext101_quantized/model.py b/qai_hub_models/models/resnext101_quantized/model.py
index 9a6277e1..98ea0d76 100644
--- a/qai_hub_models/models/resnext101_quantized/model.py
+++ b/qai_hub_models/models/resnext101_quantized/model.py
@@ -8,27 +8,26 @@
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet import (
     AIMETQuantizableMixin,
-    HubCompileOptionsInt8Mixin,
 )
 
 # isort: on
 
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.resnext101.model import ResNeXt101
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 3
+MODEL_ASSET_VERSION = 4
 DEFAULT_ENCODINGS = "resnext101_quantized_encodings.json"
 
 
-class ResNeXt101Quantizable(
-    HubCompileOptionsInt8Mixin, AIMETQuantizableMixin, ResNeXt101
-):
+class ResNeXt101Quantizable(AIMETQuantizableMixin, ResNeXt101):
     """ResNeXt101 with post train quantization support.
 
     Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
@@ -40,9 +39,15 @@ def __init__(
     ) -> None:
         ResNeXt101.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=False
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
@@ -56,15 +61,16 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = ResNeXt101.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
 
+        model = prepare_model(model)
         equalize_model(model, input_shape)
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
+            config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
 
@@ -77,3 +83,11 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml
index 0d14a5d4..ddc514fb 100644
--- a/qai_hub_models/models/resnext101_quantized/perf.yaml
+++ b/qai_hub_models/models/resnext101_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNeXt101Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 76378.0
-      throughput: 13.092775406530677
+      inference_time: 2842.0
+      throughput: 351.8648838845883
       estimated_peak_memory_range:
-        min: 143360
-        max: 3223784
+        min: 16384
+        max: 1739432
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 149
+        layers_on_npu: 146
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 149
-      job_id: jmg9zy8qp
+        total_layers: 146
+      job_id: jygzekkg8
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:42:18.013006Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 2088.0
+      throughput: 478.9272030651341
+      estimated_peak_memory_range:
+        min: 36864
+        max: 251955536
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 146
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 146
+      job_id: jnp1lz25q
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:24:55.190881Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:42:18.013015Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/resnext101_quantized/test.py b/qai_hub_models/models/resnext101_quantized/test.py
index 8beed1b8..1df1173a 100644
--- a/qai_hub_models/models/resnext101_quantized/test.py
+++ b/qai_hub_models/models/resnext101_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.resnext101_quantized.demo import main as demo_main
 from qai_hub_models.models.resnext101_quantized.model import (
@@ -26,16 +25,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        ResNeXt101Quantizable.from_pretrained(),
-        is_quantized=True,
-        diff_tol=0.007,
-        rtol=0.02,
-        atol=0.2,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/resnext50/README.md b/qai_hub_models/models/resnext50/README.md
index 91f1ce68..a7426562 100644
--- a/qai_hub_models/models/resnext50/README.md
+++ b/qai_hub_models/models/resnext50/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of ResNeXt50 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext50).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.resnext50.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of ResNeXt50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
diff --git a/qai_hub_models/models/resnext50/conftest.py b/qai_hub_models/models/resnext50/conftest.py
new file mode 100644
index 00000000..abe7f903
--- /dev/null
+++ b/qai_hub_models/models/resnext50/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnext50 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnext50.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnext50/demo.py b/qai_hub_models/models/resnext50/demo.py
index 46e8761a..770e8c07 100644
--- a/qai_hub_models/models/resnext50/demo.py
+++ b/qai_hub_models/models/resnext50/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.resnext50.model import ResNeXt50
+from qai_hub_models.models.resnext50.model import MODEL_ID, ResNeXt50
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ResNeXt50, is_test)
+    imagenet_demo(ResNeXt50, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/resnext50/export.py b/qai_hub_models/models/resnext50/export.py
index ecdcf293..7baf5fa2 100644
--- a/qai_hub_models/models/resnext50/export.py
+++ b/qai_hub_models/models/resnext50/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/resnext50/info.yaml b/qai_hub_models/models/resnext50/info.yaml
index a2e0e3c4..9ba350da 100644
--- a/qai_hub_models/models/resnext50/info.yaml
+++ b/qai_hub_models/models/resnext50/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1611.05431
 research_paper_title: Aggregated Residual Transformations for Deep Neural Networks
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -34,6 +35,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/resnext50/model.py b/qai_hub_models/models/resnext50/model.py
index 94a5239f..5b6cd9db 100644
--- a/qai_hub_models/models/resnext50/model.py
+++ b/qai_hub_models/models/resnext50/model.py
@@ -14,6 +14,6 @@
 
 class ResNeXt50(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ResNeXt50:
         net = tv_models.resnext50_32x4d(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml
index 7801c578..806199a4 100644
--- a/qai_hub_models/models/resnext50/perf.yaml
+++ b/qai_hub_models/models/resnext50/perf.yaml
@@ -17,12 +17,15 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: ResNeXt50
@@ -32,7 +35,7 @@ models:
       throughput: 472.14353163361665
       estimated_peak_memory_range:
         min: 16384
-        max: 2188056
+        max: 2846256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 77
-      job_id: jep2r94xg
+      job_id: j7gjxq1pd
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:21:42.740361Z'
     torchscript_onnx_qnn:
-      inference_time: 2068.0
-      throughput: 483.55899419729207
+      inference_time: 2081.0
+      throughput: 480.5382027871216
       estimated_peak_memory_range:
-        min: 16384
-        max: 67185584
+        min: 12288
+        max: 67945728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 125
-      job_id: jqpyojqr5
+      job_id: jygzen4g8
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1551.0
+      throughput: 644.7453255963894
+      estimated_peak_memory_range:
+        min: 16384
+        max: 161276560
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 77
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 77
+      job_id: jlpe9y8gr
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:07:32.076107Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:23:42.732818Z'
+    torchscript_onnx_qnn:
+      inference_time: 1518.0
+      throughput: 658.7615283267457
+      estimated_peak_memory_range:
+        min: 618496
+        max: 57881488
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 125
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 125
+      job_id: jz5wo44p1
+      job_status: Passed
diff --git a/qai_hub_models/models/resnext50/test.py b/qai_hub_models/models/resnext50/test.py
index 923fbdeb..80736558 100644
--- a/qai_hub_models/models/resnext50/test.py
+++ b/qai_hub_models/models/resnext50/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(ResNeXt50.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ResNeXt50.from_pretrained())
 
diff --git a/qai_hub_models/models/resnext50_quantized/README.md b/qai_hub_models/models/resnext50_quantized/README.md
new file mode 100644
index 00000000..36fd8218
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/README.md
@@ -0,0 +1,54 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [ResNeXt50Quantized: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/resnext50_quantized)
+
+ResNeXt50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases.
+
+This is based on the implementation of ResNeXt50Quantized found
+[here](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext50_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.resnext50_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.resnext50_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of ResNeXt50Quantized can be found
+  [here](https://github.com/pytorch/vision/blob/main/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
+* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/resnext50_quantized/__init__.py b/qai_hub_models/models/resnext50_quantized/__init__.py
new file mode 100644
index 00000000..0e0b34c7
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.app import (  # noqa: F401
+    ImagenetClassifierApp as App,
+)
+
+from .model import MODEL_ID  # noqa: F401
+from .model import ResNeXt50Quantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/resnext50_quantized/conftest.py b/qai_hub_models/models/resnext50_quantized/conftest.py
new file mode 100644
index 00000000..e26716d6
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.resnext50_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.resnext50_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/resnext50_quantized/demo.py b/qai_hub_models/models/resnext50_quantized/demo.py
new file mode 100644
index 00000000..58d9d2b2
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/demo.py
@@ -0,0 +1,17 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
+from qai_hub_models.models.resnext50_quantized.model import (
+    MODEL_ID,
+    ResNeXt50Quantizable,
+)
+
+
+def main(is_test: bool = False):
+    imagenet_demo(ResNeXt50Quantizable, MODEL_ID, is_test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py
new file mode 100644
index 00000000..7f4a6bcc
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/export.py
@@ -0,0 +1,209 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnext50_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "resnext50_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "resnext50_quantized",
+            "ResNeXt50Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options + " --force_channel_last_input image_tensor"
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub.Device(device),
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = transpose_channel_first_to_last(
+            "image_tensor", sample_inputs, target_runtime
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub.Device(device),
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(str(output_path / f"{model_name}.tflite"))
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        print_inference_metrics(
+            inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
+        )
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model, supports_qnn=False)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnext50_quantized/info.yaml b/qai_hub_models/models/resnext50_quantized/info.yaml
new file mode 100644
index 00000000..69be8e37
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/info.yaml
@@ -0,0 +1,43 @@
+name: ResNeXt50Quantized
+# id must match with the model dir name in qai_hub_models
+id: resnext50_quantized
+status: public
+headline: Imagenet classifier and general purpose backbone.
+domain: Computer Vision
+description: ResNeXt50 is a machine learning model that can classify images from the
+  Imagenet dataset. It can also be used as a backbone in building more complex models
+  for specific use cases.
+use_case: Image Classification
+tags:
+  - backbone
+  - quantized
+research_paper: https://arxiv.org/abs/1611.05431
+research_paper_title: Aggregated Residual Transformations for Deep Neural Networks
+license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
+technical_details:
+  Model checkpoint: Imagenet
+  Input resolution: 224x224
+  Number of parameters: 88.7M
+  Model size: 87.3 MB
+applicable_scenarios:
+  - Medical Imaging
+  - Anomaly Detection
+  - Inventory Management
+related_models:
+  - mobilenet_v2
+  - densenet121
+  - googlenet
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+  - XR
+has_static_banner: yes
+has_animated_banner: yes
+license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
+dataset:
+  - imagenet-1k
+  - imagenet-22k
diff --git a/qai_hub_models/models/resnext50_quantized/model.py b/qai_hub_models/models/resnext50_quantized/model.py
new file mode 100644
index 00000000..dca50076
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/model.py
@@ -0,0 +1,93 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.resnext50.model import ResNeXt50
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 1
+DEFAULT_ENCODINGS = "resnext50_quantized_encodings.json"
+
+
+class ResNeXt50Quantizable(AIMETQuantizableMixin, ResNeXt50):
+    """ResNeXt50 with post train quantization support.
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        sim_model: QuantizationSimModel,
+    ) -> None:
+        ResNeXt50.__init__(self, sim_model.model)
+        AIMETQuantizableMixin.__init__(
+            self,
+            sim_model,
+        )
+
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "ResNeXt50Quantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        model = ResNeXt50.from_pretrained()
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+
+        model = prepare_model(model)
+        equalize_model(model, input_shape)
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=torch.rand(input_shape),
+        )
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        sim.model.eval()
+        return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/whisper_asr/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml
similarity index 71%
rename from qai_hub_models/models/whisper_asr/perf.yaml
rename to qai_hub_models/models/resnext50_quantized/perf.yaml
index f8e81783..67cbf162 100644
--- a/qai_hub_models/models/whisper_asr/perf.yaml
+++ b/qai_hub_models/models/resnext50_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
-- name: WhisperEncoder
+- name: ResNeXt50Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 68918.0
-      throughput: 14.50999738820047
+      inference_time: 874.0
+      throughput: 1144.1647597254005
       estimated_peak_memory_range:
-        min: 18612224
-        max: 67240168
-      primary_compute_unit: GPU
-      precision: fp16
+        min: 12288
+        max: 1920376
+      primary_compute_unit: NPU
+      precision: int8
       layer_info:
-        layers_on_npu: 0
-        layers_on_gpu: 216
+        layers_on_npu: 78
+        layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 216
-      job_id: j1p3z16z5
+        total_layers: 78
+      job_id: jegn27jgo
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:10:31.822073Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,31 +68,29 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
-    reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
-      form_factor: Phone
-      os_name: Android
-      manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:20.996693Z'
-- name: WhisperDecoder
-  performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7924.0
-      throughput: 126.19888944977284
+      inference_time: 656.0
+      throughput: 1524.3902439024391
       estimated_peak_memory_range:
-        min: 3014656
-        max: 5380072
+        min: 12288
+        max: 96222112
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 293
+        layers_on_npu: 78
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 293
-      job_id: jwgoln8dg
+        total_layers: 78
+      job_id: joprknk50
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:10:31.822087Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -97,11 +106,3 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
-    reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
-      form_factor: Phone
-      os_name: Android
-      manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:35:36.297844Z'
diff --git a/qai_hub_models/models/resnext50_quantized/test.py b/qai_hub_models/models/resnext50_quantized/test.py
new file mode 100644
index 00000000..4cd1dbbd
--- /dev/null
+++ b/qai_hub_models/models/resnext50_quantized/test.py
@@ -0,0 +1,30 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
+    run_imagenet_classifier_test,
+)
+from qai_hub_models.models.resnext50_quantized.demo import main as demo_main
+from qai_hub_models.models.resnext50_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    ResNeXt50Quantizable,
+)
+
+
+def test_task():
+    run_imagenet_classifier_test(
+        ResNeXt50Quantizable.from_pretrained(),
+        MODEL_ID,
+        probability_threshold=0.46,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+        asset_version=MODEL_ASSET_VERSION,
+    )
+
+
+def test_demo():
+    # Verify demo does not crash
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/sam/README.md b/qai_hub_models/models/sam/README.md
index d5b814a1..937df382 100644
--- a/qai_hub_models/models/sam/README.md
+++ b/qai_hub_models/models/sam/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Segment-Anything-Model found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/sam).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.sam.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Segment-Anything-Model can be found
   [here](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Segment Anything](https://arxiv.org/abs/2304.02643)
diff --git a/qai_hub_models/models/sam/conftest.py b/qai_hub_models/models/sam/conftest.py
new file mode 100644
index 00000000..7f6e737d
--- /dev/null
+++ b/qai_hub_models/models/sam/conftest.py
@@ -0,0 +1,28 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.sam import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.sam.Model.from_pretrained",
+        return_value=Model.from_pretrained(
+            model_type="vit_b",
+        ),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py
index 0ebb9f97..a5ed59d6 100644
--- a/qai_hub_models/models/sam/export.py
+++ b/qai_hub_models/models/sam/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -18,7 +18,7 @@
 
 from qai_hub_models.models.sam import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -91,9 +91,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or DEFAULT_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "sam",
@@ -112,75 +112,90 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "SAMDecoder" in components:
-        components_dict["SAMDecoder"] = model.get_sam_decoder()
+        components_dict["SAMDecoder"] = model.get_sam_decoder()  # type: ignore
     if "SAMEncoder" in components:
-        components_dict["SAMEncoder"] = model.get_sam_encoder()
+        components_dict["SAMEncoder"] = model.get_sam_encoder()  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         source_model = optimize_for_mobile(
             source_model,
             optimization_blocklist={
-                MobileOptimizerType.HOIST_CONV_PACKED_PARAMS,
-                MobileOptimizerType.INSERT_FOLD_PREPACK_OPS,
-                MobileOptimizerType.CONV_BN_FUSION,
+                MobileOptimizerType.HOIST_CONV_PACKED_PARAMS,  # type: ignore
+                MobileOptimizerType.INSERT_FOLD_PREPACK_OPS,  # type: ignore
+                MobileOptimizerType.CONV_BN_FUSION,  # type: ignore
             },
         )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
-            target_runtime,
-            compile_options
-            + " --force_channel_last_input image --force_channel_last_output output_0",
+            target_runtime, compile_options
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=sample_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -189,8 +204,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -198,8 +213,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/sam/info.yaml b/qai_hub_models/models/sam/info.yaml
index 59b6b5c5..2be861b5 100644
--- a/qai_hub_models/models/sam/info.yaml
+++ b/qai_hub_models/models/sam/info.yaml
@@ -15,6 +15,7 @@ tags:
 research_paper: https://arxiv.org/abs/2304.02643
 research_paper_title: Segment Anything
 license: https://github.com/facebookresearch/segment-anything/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/facebookresearch/segment-anything
 technical_details:
   Model checkpoint: vit_l
@@ -32,4 +33,5 @@ related_models: []
 has_static_banner: yes
 has_animated_banner: yes
 license_type: apache-2.0
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/sam/model.py b/qai_hub_models/models/sam/model.py
index 1e9561c8..e6fbd483 100644
--- a/qai_hub_models/models/sam/model.py
+++ b/qai_hub_models/models/sam/model.py
@@ -117,20 +117,41 @@ def forward(self, image: torch.Tensor) -> torch.Tensor:
         """
         return self.sam.image_encoder(image)
 
-    def get_input_spec(
+    def _get_input_spec_for_model_instance(
         self,
-        height: int = 720,
-        width: int = 1280,
+        batch_size: int = 1,
+        num_channels: int = 3,
+    ) -> InputSpec:
+        """
+        Override for model.get_input_spec() when called on instances of this class.
+
+        The initializer for BaseModel will automatically override get_input_spec
+        with this function when the class is instantiated.
+        """
+        return self.__class__.get_input_spec(
+            batch_size,
+            num_channels,
+            self.sam.image_encoder.img_size,
+            self.sam.image_encoder.img_size,
+        )
+
+    @staticmethod
+    def get_input_spec(
+        batch_size: int = 1,
+        num_channels: int = 3,
+        encoder_img_height: int = 1024,  # self.sam.image_encoder.img_size[0]
+        encoder_img_width: int = 1024,  # self.sam.image_encoder.img_size[1]
     ) -> InputSpec:
         # Get the input specification ordered (name -> (shape, type)) pairs for this model.
         #
         # This can be used with the qai_hub python API to declare
         # the model input specification upon submitting a profile job.
-
-        preprocessed_image = self.preprocess_input_image(
-            np.ones((height, width, 3), dtype=np.uint8)
-        )
-        return {"image": (preprocessed_image.shape, "float32")}
+        return {
+            "image": (
+                (batch_size, num_channels, encoder_img_height, encoder_img_width),
+                "float32",
+            )
+        }
 
     def preprocess_input_image(self, input_image: np.ndarray):
         """Transform input image to work with SAM encoder"""
@@ -206,16 +227,35 @@ def forward(
             image_embeddings, point_coords, point_labels, mask_input, has_mask_input
         )
 
-    def get_input_spec(
+    def _get_input_spec_for_model_instance(
         self,
-        num_of_points=1,
+        num_of_points: int = 1,
+    ) -> InputSpec:
+        """
+        Override for model.get_input_spec() when called on instances of this class.
+
+        The initializer for BaseModel will automatically override get_input_spec
+        with this function when the class is instantiated.
+        """
+        return self.__class__.get_input_spec(
+            num_of_points,
+            self.sam.prompt_encoder.embed_dim,
+            self.sam.prompt_encoder.image_embedding_size[0],
+            self.sam.prompt_encoder.image_embedding_size[1],
+        )
+
+    @staticmethod
+    def get_input_spec(
+        num_of_points: int = 1,
+        embed_dim: int = 256,
+        image_embedding_height: int = 64,
+        image_embedding_width: int = 64,
     ) -> InputSpec:
         # Get the input specification ordered (name -> (shape, type)) pairs for this model.
         #
         # This can be used with the qai_hub python API to declare
         # the model input specification upon submitting a profile job.
-        embed_dim = self.sam.prompt_encoder.embed_dim
-        embed_size = self.sam.prompt_encoder.image_embedding_size
+        embed_size = (image_embedding_height, image_embedding_width)
         mask_input_size = [4 * x for x in embed_size]
 
         input_spec = {
diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml
index 7aa44891..6ea06c1e 100644
--- a/qai_hub_models/models/sam/perf.yaml
+++ b/qai_hub_models/models/sam/perf.yaml
@@ -2,6 +2,12 @@ aggregated:
   supported_oses:
   - Android
   supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -11,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: SAMDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 16696.0
-      throughput: 59.89458552946814
+      inference_time: 16761.0
+      throughput: 59.66231131794046
       estimated_peak_memory_range:
-        min: 71995392
-        max: 131856168
+        min: 42115072
+        max: 92806968
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 356
-        layers_on_cpu: 8
-        total_layers: 364
-      job_id: j1pvlewr5
+        layers_on_cpu: 9
+        total_layers: 365
+      job_id: jmg9vkm57
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:10:36.748428Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -51,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 13794.0
+      throughput: 72.4952878062926
+      estimated_peak_memory_range:
+        min: 41951232
+        max: 94062064
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 356
+        layers_on_cpu: 9
+        total_layers: 365
+      job_id: jnp107n5q
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-22T17:43:03.980523Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:10:36.748439Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/sam/requirements.txt b/qai_hub_models/models/sam/requirements.txt
index 116e68bc..c0faedcb 100644
--- a/qai_hub_models/models/sam/requirements.txt
+++ b/qai_hub_models/models/sam/requirements.txt
@@ -1,4 +1,2 @@
-matplotlib
-opencv_python
-pycocotools
-requests
+matplotlib==3.7.4
+pycocotools==2.0.7
diff --git a/qai_hub_models/models/sam/test.py b/qai_hub_models/models/sam/test.py
index 640a4286..fd862267 100644
--- a/qai_hub_models/models/sam/test.py
+++ b/qai_hub_models/models/sam/test.py
@@ -6,7 +6,7 @@
 import pytest
 import torch
 
-from qai_hub_models.models.sam import App
+from qai_hub_models.models.sam.app import SAMApp
 from qai_hub_models.models.sam.demo import IMAGE_ADDRESS
 from qai_hub_models.models.sam.demo import main as demo_main
 from qai_hub_models.models.sam.model import SMALL_MODEL_TYPE, SAMQAIHMWrapper
@@ -36,7 +36,7 @@ def test_e2e_numerical(
 
     sam_predictor.set_image(input_image_data)
     # QAIHM SAMApp for segmentation
-    sam_app = App(sam_wrapper)
+    sam_app = SAMApp(sam_wrapper)
     # Prepare image for segmentation
     sam_app.prepare(input_image_data)
 
diff --git a/qai_hub_models/models/sesr_m5/README.md b/qai_hub_models/models/sesr_m5/README.md
index cc0a70a9..37483cd4 100644
--- a/qai_hub_models/models/sesr_m5/README.md
+++ b/qai_hub_models/models/sesr_m5/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of SESR-M5 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/sesr_m5).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.sesr_m5.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SESR-M5 can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Collapsible Linear Blocks for Super-Efficient Super Resolution](https://arxiv.org/abs/2103.09404)
diff --git a/qai_hub_models/models/sesr_m5/conftest.py b/qai_hub_models/models/sesr_m5/conftest.py
new file mode 100644
index 00000000..238f114e
--- /dev/null
+++ b/qai_hub_models/models/sesr_m5/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.sesr_m5 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.sesr_m5.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/sesr_m5/demo.py b/qai_hub_models/models/sesr_m5/demo.py
index d8b0c0f9..312bbab4 100644
--- a/qai_hub_models/models/sesr_m5/demo.py
+++ b/qai_hub_models/models/sesr_m5/demo.py
@@ -16,6 +16,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         model_cls=SESR_M5,
+        model_id=MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
     )
diff --git a/qai_hub_models/models/sesr_m5/export.py b/qai_hub_models/models/sesr_m5/export.py
index c857a7c6..51517850 100644
--- a/qai_hub_models/models/sesr_m5/export.py
+++ b/qai_hub_models/models/sesr_m5/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/sesr_m5/info.yaml b/qai_hub_models/models/sesr_m5/info.yaml
index 081f7b0b..06f7dae2 100644
--- a/qai_hub_models/models/sesr_m5/info.yaml
+++ b/qai_hub_models/models/sesr_m5/info.yaml
@@ -10,6 +10,7 @@ tags: []
 research_paper: https://arxiv.org/abs/2103.09404
 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr
 technical_details:
   Model checkpoint: sesr_m5_4x_checkpoint_float32
@@ -29,4 +30,5 @@ related_models:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/sesr_m5/model.py b/qai_hub_models/models/sesr_m5/model.py
index 2b0b6ec8..c7283ab9 100644
--- a/qai_hub_models/models/sesr_m5/model.py
+++ b/qai_hub_models/models/sesr_m5/model.py
@@ -37,8 +37,6 @@ def __init__(
     @classmethod
     def from_pretrained(cls) -> SESR_M5:
         model = _load_sesr_source_model(
-            MODEL_ID,
-            MODEL_ASSET_VERSION,
             SCALING_FACTOR,
             NUM_CHANNELS,
             NUM_LBLOCKS,
diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml
index 95e47b7b..dc0d7cac 100644
--- a/qai_hub_models/models/sesr_m5/perf.yaml
+++ b/qai_hub_models/models/sesr_m5/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: SESR-M5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2214.0
-      throughput: 451.6711833785005
+      inference_time: 2245.0
+      throughput: 445.43429844097994
       estimated_peak_memory_range:
-        min: 49152
-        max: 8233656
+        min: 28672
+        max: 9857128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jz5wl394p
+      job_id: jwgoyjd58
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:02:55.130462Z'
     torchscript_onnx_qnn:
-      inference_time: 2149.0
-      throughput: 465.33271288971616
+      inference_time: 2136.0
+      throughput: 468.1647940074906
       estimated_peak_memory_range:
-        min: 212992
-        max: 77434640
+        min: 221184
+        max: 3873216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 32
-      job_id: jmg9zy4mp
+      job_id: j7gjxj8pd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1533.0
+      throughput: 652.3157208088714
+      estimated_peak_memory_range:
+        min: 16384
+        max: 23601872
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 22
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 25
+      job_id: j1pv3jm5x
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:36:38.760826Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:04:43.299283Z'
+    torchscript_onnx_qnn:
+      inference_time: 1462.0
+      throughput: 683.9945280437756
+      estimated_peak_memory_range:
+        min: 208896
+        max: 20706112
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 32
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 32
+      job_id: jlpe9j0gr
+      job_status: Passed
diff --git a/qai_hub_models/models/sesr_m5/test.py b/qai_hub_models/models/sesr_m5/test.py
index 8412b7dc..e59f48d4 100644
--- a/qai_hub_models/models/sesr_m5/test.py
+++ b/qai_hub_models/models/sesr_m5/test.py
@@ -34,5 +34,6 @@ def test_task():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/sesr_m5_quantized/README.md b/qai_hub_models/models/sesr_m5_quantized/README.md
index 8e042ba9..dadc3fc0 100644
--- a/qai_hub_models/models/sesr_m5_quantized/README.md
+++ b/qai_hub_models/models/sesr_m5_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of SESR-M5-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/sesr_m5_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.sesr_m5_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SESR-M5-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Collapsible Linear Blocks for Super-Efficient Super Resolution](https://arxiv.org/abs/2103.09404)
diff --git a/qai_hub_models/models/sesr_m5_quantized/conftest.py b/qai_hub_models/models/sesr_m5_quantized/conftest.py
new file mode 100644
index 00000000..36e64be6
--- /dev/null
+++ b/qai_hub_models/models/sesr_m5_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.sesr_m5_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.sesr_m5_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/sesr_m5_quantized/demo.py b/qai_hub_models/models/sesr_m5_quantized/demo.py
index cb08ed1f..4d063bdd 100644
--- a/qai_hub_models/models/sesr_m5_quantized/demo.py
+++ b/qai_hub_models/models/sesr_m5_quantized/demo.py
@@ -19,6 +19,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         SESR_M5Quantizable,
+        MODEL_ID,
         default_image=IMAGE_ADDRESS,
         is_test=is_test,
         available_target_runtimes=[TargetRuntime.TFLITE],
diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py
index 9084e32a..180d06e3 100644
--- a/qai_hub_models/models/sesr_m5_quantized/export.py
+++ b/qai_hub_models/models/sesr_m5_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -119,8 +119,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -128,21 +128,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -154,30 +162,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
     return (compile_job, profile_job, inference_job)
diff --git a/qai_hub_models/models/sesr_m5_quantized/info.yaml b/qai_hub_models/models/sesr_m5_quantized/info.yaml
index b4ac2e2e..f62e37c6 100644
--- a/qai_hub_models/models/sesr_m5_quantized/info.yaml
+++ b/qai_hub_models/models/sesr_m5_quantized/info.yaml
@@ -10,6 +10,7 @@ tags: [quantized]
 research_paper: https://arxiv.org/abs/2103.09404
 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr
 technical_details:
   Model checkpoint: sesr_m5_4x_checkpoint_int8
@@ -27,4 +28,5 @@ related_models: [xlsr, xlsr_quantized, quicksrnetlarge]
 has_static_banner: yes
 has_animated_banner: yes
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py
index 4e85a042..7e2bac70 100644
--- a/qai_hub_models/models/sesr_m5_quantized/model.py
+++ b/qai_hub_models/models/sesr_m5_quantized/model.py
@@ -60,9 +60,7 @@ def from_pretrained(
         aimet_encodings: str | None = "DEFAULT",
     ) -> SESR_M5Quantizable:
         # Load Model
-        sesr = _load_sesr_source_model(
-            MODEL_ID, MODEL_ASSET_VERSION, SCALING_FACTOR, NUM_CHANNELS, NUM_LBLOCKS
-        )
+        sesr = _load_sesr_source_model(SCALING_FACTOR, NUM_CHANNELS, NUM_LBLOCKS)
         input_shape = SESR_M5.get_input_spec()["image"][0]
         equalize_model(sesr, input_shape)
 
diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml
index 14a85ccd..ba9de102 100644
--- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml
+++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: SESR-M5-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1743.0
-      throughput: 573.7234652897304
+      inference_time: 1749.0
+      throughput: 571.7552887364208
       estimated_peak_memory_range:
-        min: 24576
-        max: 2845656
+        min: 28672
+        max: 6325016
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 13
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: jz5wl31jp
+      job_id: joprk1k50
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:59:28.460705Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 1403.0
+      throughput: 712.7583749109052
+      estimated_peak_memory_range:
+        min: 20480
+        max: 21054176
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 13
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 16
+      job_id: jep2836p6
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:34:35.502394Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:59:28.460714Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/sesr_m5_quantized/test.py b/qai_hub_models/models/sesr_m5_quantized/test.py
index 46c55138..86bb6543 100644
--- a/qai_hub_models/models/sesr_m5_quantized/test.py
+++ b/qai_hub_models/models/sesr_m5_quantized/test.py
@@ -7,6 +7,7 @@
 import zipfile
 
 import numpy as np
+import pytest
 import torch
 
 from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp
@@ -45,12 +46,13 @@ def test_task():
     )
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_trace():
     image = load_image(IMAGE_ADDRESS)
     output_image = load_image(OUTPUT_IMAGE_ADDRESS)
     app = SuperResolutionApp(
-        SESR_M5Quantizable.from_pretrained().convert_to_quantized_torchscript()
+        SESR_M5Quantizable.from_pretrained().convert_to_torchscript()
     )
     app_output_image = app.predict(image)[0]
 
diff --git a/qai_hub_models/models/shufflenet_v2/README.md b/qai_hub_models/models/shufflenet_v2/README.md
index d4763144..dbf1bb82 100644
--- a/qai_hub_models/models/shufflenet_v2/README.md
+++ b/qai_hub_models/models/shufflenet_v2/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Shufflenet-v2 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/shufflenet_v2).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.shufflenet_v2.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Shufflenet-v2 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
diff --git a/qai_hub_models/models/shufflenet_v2/conftest.py b/qai_hub_models/models/shufflenet_v2/conftest.py
new file mode 100644
index 00000000..ce602a0e
--- /dev/null
+++ b/qai_hub_models/models/shufflenet_v2/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.shufflenet_v2 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.shufflenet_v2.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/shufflenet_v2/demo.py b/qai_hub_models/models/shufflenet_v2/demo.py
index e1c488de..834dc0a1 100644
--- a/qai_hub_models/models/shufflenet_v2/demo.py
+++ b/qai_hub_models/models/shufflenet_v2/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.shufflenet_v2.model import ShufflenetV2
+from qai_hub_models.models.shufflenet_v2.model import MODEL_ID, ShufflenetV2
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ShufflenetV2, is_test)
+    imagenet_demo(ShufflenetV2, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/shufflenet_v2/export.py b/qai_hub_models/models/shufflenet_v2/export.py
index 74fd2aae..cdb14643 100644
--- a/qai_hub_models/models/shufflenet_v2/export.py
+++ b/qai_hub_models/models/shufflenet_v2/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/shufflenet_v2/info.yaml b/qai_hub_models/models/shufflenet_v2/info.yaml
index b33359f4..9663eb73 100644
--- a/qai_hub_models/models/shufflenet_v2/info.yaml
+++ b/qai_hub_models/models/shufflenet_v2/info.yaml
@@ -13,6 +13,7 @@ research_paper: https://arxiv.org/abs/1807.11164
 research_paper_title: 'ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture
   Design'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/shufflenetv2.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/shufflenet_v2/model.py b/qai_hub_models/models/shufflenet_v2/model.py
index 9a3b8e77..105ed1c2 100644
--- a/qai_hub_models/models/shufflenet_v2/model.py
+++ b/qai_hub_models/models/shufflenet_v2/model.py
@@ -14,6 +14,6 @@
 
 class ShufflenetV2(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ShufflenetV2:
         net = tv_models.shufflenet_v2_x0_5(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml
index 8c57e692..525e8689 100644
--- a/qai_hub_models/models/shufflenet_v2/perf.yaml
+++ b/qai_hub_models/models/shufflenet_v2/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Shufflenet-v2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 921.0
-      throughput: 1085.7763300760043
+      inference_time: 919.0
+      throughput: 1088.139281828074
       estimated_peak_memory_range:
-        min: 16384
-        max: 2322736
+        min: 12288
+        max: 2065312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 202
-      job_id: j1gly27e5
+      job_id: j2p0y1ngw
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:00:07.928895Z'
     torchscript_onnx_qnn:
-      inference_time: 321.0
-      throughput: 3115.264797507788
+      inference_time: 322.0
+      throughput: 3105.590062111801
       estimated_peak_memory_range:
-        min: 622592
-        max: 4181728
+        min: 626688
+        max: 3731328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 157
-      job_id: jw568zvvg
+      job_id: jogkzlngd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 586.0
+      throughput: 1706.4846416382252
+      estimated_peak_memory_range:
+        min: 16384
+        max: 32832960
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 202
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 202
+      job_id: j1p8o3og9
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:27:51.522582Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:03:17.860163Z'
+    torchscript_onnx_qnn:
+      inference_time: 225.0
+      throughput: 4444.444444444444
+      estimated_peak_memory_range:
+        min: 12288
+        max: 48449136
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 157
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 157
+      job_id: jn5q87o57
+      job_status: Passed
diff --git a/qai_hub_models/models/shufflenet_v2/test.py b/qai_hub_models/models/shufflenet_v2/test.py
index 1198a8ad..81498767 100644
--- a/qai_hub_models/models/shufflenet_v2/test.py
+++ b/qai_hub_models/models/shufflenet_v2/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(ShufflenetV2.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(ShufflenetV2.from_pretrained())
 
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/README.md b/qai_hub_models/models/shufflenet_v2_quantized/README.md
index 2eaa69b3..6d034ee8 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/README.md
+++ b/qai_hub_models/models/shufflenet_v2_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Shufflenet-v2Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/shufflenet_v2_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.shufflenet_v2_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Shufflenet-v2Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/conftest.py b/qai_hub_models/models/shufflenet_v2_quantized/conftest.py
new file mode 100644
index 00000000..4ad51ad0
--- /dev/null
+++ b/qai_hub_models/models/shufflenet_v2_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.shufflenet_v2_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.shufflenet_v2_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/demo.py b/qai_hub_models/models/shufflenet_v2_quantized/demo.py
index bf864ee1..bbecc0a4 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/demo.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.shufflenet_v2_quantized.model import ShufflenetV2Quantizable
+from qai_hub_models.models.shufflenet_v2_quantized.model import (
+    MODEL_ID,
+    ShufflenetV2Quantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(ShufflenetV2Quantizable, is_test)
+    imagenet_demo(ShufflenetV2Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/export.py b/qai_hub_models/models/shufflenet_v2_quantized/export.py
index 27e330aa..d4cd288c 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/export.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,42 +163,44 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_qnn=False)
+    parser = export_parser(model_cls=Model)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml
index 2f7e3b8a..afd1d7e6 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml
+++ b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/1807.11164
 research_paper_title: 'ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture
   Design'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/shufflenetv2.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/model.py b/qai_hub_models/models/shufflenet_v2_quantized/model.py
index b6121793..ba13c2c4 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/model.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/model.py
@@ -8,26 +8,35 @@
 # This verifies aimet is installed, and this must be included first.
 from qai_hub_models.utils.quantization_aimet import (
     AIMETQuantizableMixin,
-    HubCompileOptionsInt8Mixin,
 )
 
 # isort: on
 
 import torch
-from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.cross_layer_equalization import (
+    equalize_bn_folded_model,
+    fold_all_batch_norms,
+)
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.shufflenet_v2.model import ShufflenetV2
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+from qai_hub_models.utils.quantization_aimet import (
+    convert_all_depthwise_to_per_tensor,
+    tie_aimet_observer_groups,
+)
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 1
+MODEL_ASSET_VERSION = 2
 DEFAULT_ENCODINGS = "shufflenet_v2_quantized_encodings.json"
 
 
 class ShufflenetV2Quantizable(
-    HubCompileOptionsInt8Mixin, AIMETQuantizableMixin, ShufflenetV2
+    AIMETQuantizableMixin,
+    ShufflenetV2,
 ):
     """ShufflenetV2 with post train quantization support.
 
@@ -40,9 +49,15 @@ def __init__(
     ) -> None:
         ShufflenetV2.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=True
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
@@ -56,17 +71,22 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = ShufflenetV2.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+        model = prepare_model(model)
+        dummy_input = torch.rand(input_shape)
 
-        equalize_model(model, input_shape)
+        pairs = fold_all_batch_norms(model, input_shape, dummy_input)
+        equalize_bn_folded_model(model, input_shape, pairs, dummy_input)
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
-            dummy_input=torch.rand(input_shape),
+            config_file=get_default_aimet_config(),
+            dummy_input=dummy_input,
         )
+        convert_all_depthwise_to_per_tensor(sim.model)
+        cls._tie_pre_concat_quantizers(sim)
 
         if aimet_encodings:
             if aimet_encodings == "DEFAULT":
@@ -77,3 +97,65 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
+
+    @classmethod
+    def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel):
+        """
+        This ties together the output quantizers prior to concatenations. This
+        prevents unnecessary re-quantization during the concatenation.
+        """
+        n = sim.model.net
+        # Because of skip connections, the groups are large
+        groups = [
+            [
+                getattr(getattr(n.stage2, "0").branch1, "4"),
+                getattr(getattr(n.stage2, "0").branch2, "7"),
+                getattr(n.stage2, "0").module_cat,
+                getattr(getattr(n.stage2, "1").branch2, "7"),
+                getattr(n.stage2, "1").module_cat_1,
+                getattr(getattr(n.stage2, "2").branch2, "7"),
+                getattr(n.stage2, "2").module_cat_2,
+                getattr(getattr(n.stage2, "3").branch2, "7"),
+                getattr(n.stage2, "3").module_cat_3,
+            ],
+            [
+                getattr(getattr(n.stage3, "0").branch1, "4"),
+                getattr(getattr(n.stage3, "0").branch2, "7"),
+                getattr(n.stage3, "0").module_cat_4,
+                getattr(getattr(n.stage3, "1").branch2, "7"),
+                getattr(n.stage3, "1").module_cat_5,
+                getattr(getattr(n.stage3, "2").branch2, "7"),
+                getattr(n.stage3, "2").module_cat_6,
+                getattr(getattr(n.stage3, "3").branch2, "7"),
+                getattr(n.stage3, "3").module_cat_7,
+                getattr(getattr(n.stage3, "4").branch2, "7"),
+                getattr(n.stage3, "4").module_cat_8,
+                getattr(getattr(n.stage3, "5").branch2, "7"),
+                getattr(n.stage3, "5").module_cat_9,
+                getattr(getattr(n.stage3, "6").branch2, "7"),
+                getattr(n.stage3, "6").module_cat_10,
+                getattr(getattr(n.stage3, "7").branch2, "7"),
+                getattr(n.stage3, "7").module_cat_11,
+            ],
+            [
+                getattr(getattr(n.stage4, "0").branch1, "4"),
+                getattr(getattr(n.stage4, "0").branch2, "7"),
+                getattr(n.stage4, "0").module_cat_12,
+                getattr(getattr(n.stage4, "1").branch2, "7"),
+                getattr(n.stage4, "1").module_cat_13,
+                getattr(getattr(n.stage4, "2").branch2, "7"),
+                getattr(n.stage4, "2").module_cat_14,
+                getattr(getattr(n.stage4, "3").branch2, "7"),
+                getattr(n.stage4, "3").module_cat_15,
+            ],
+        ]
+
+        tie_aimet_observer_groups(groups)
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
index 877fce75..6459f84c 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
+++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Shufflenet-v2Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 30460.0
-      throughput: 32.829940906106366
+      inference_time: 579.0
+      throughput: 1727.1157167530225
       estimated_peak_memory_range:
-        min: 294912
-        max: 4752264
+        min: 16384
+        max: 4558296
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 221
+        layers_on_npu: 203
         layers_on_gpu: 0
-        layers_on_cpu: 17
-        total_layers: 238
-      job_id: jnp1nw8kg
+        layers_on_cpu: 0
+        total_layers: 203
+      job_id: j1p89yxg9
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:32:39.995361Z'
     torchscript_onnx_qnn:
-      inference_time: 355.0
-      throughput: 2816.9014084507044
+      inference_time: 279.0
+      throughput: 3584.2293906810037
       estimated_peak_memory_range:
         min: 0
-        max: 3208840
+        max: 75494608
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 122
+        layers_on_npu: 120
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 122
-      job_id: jvgddqvkg
+        total_layers: 120
+      job_id: j1glzm8pv
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 385.0
+      throughput: 2597.4025974025976
+      estimated_peak_memory_range:
+        min: 12288
+        max: 21664192
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 203
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 203
+      job_id: jn5qkq457
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:21:57.529965Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:34:41.782968Z'
+    torchscript_onnx_qnn:
+      inference_time: 204.0
+      throughput: 4901.9607843137255
+      estimated_peak_memory_range:
+        min: 163840
+        max: 41738848
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 120
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 120
+      job_id: jw56j40po
+      job_status: Passed
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/test.py b/qai_hub_models/models/shufflenet_v2_quantized/test.py
index 339d2f16..995731eb 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/test.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.shufflenet_v2_quantized.demo import main as demo_main
 from qai_hub_models.models.shufflenet_v2_quantized.model import (
@@ -25,16 +24,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        ShufflenetV2Quantizable.from_pretrained(),
-        diff_tol=0.01,
-        rtol=0.02,
-        atol=0.2,
-        is_quantized=True,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/sinet/README.md b/qai_hub_models/models/sinet/README.md
index a7d97cf1..5ddf002c 100644
--- a/qai_hub_models/models/sinet/README.md
+++ b/qai_hub_models/models/sinet/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of SINet found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/sinet).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.sinet.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SINet can be found
   [here](https://github.com/clovaai/ext_portrait_segmentation/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules and Information Blocking Decoder](https://arxiv.org/abs/1911.09099)
diff --git a/qai_hub_models/models/sinet/conftest.py b/qai_hub_models/models/sinet/conftest.py
new file mode 100644
index 00000000..b5b3c585
--- /dev/null
+++ b/qai_hub_models/models/sinet/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.sinet import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.sinet.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/sinet/demo.py b/qai_hub_models/models/sinet/demo.py
index 51972075..b4a56d63 100644
--- a/qai_hub_models/models/sinet/demo.py
+++ b/qai_hub_models/models/sinet/demo.py
@@ -30,8 +30,8 @@ def main(is_test: bool = False):
         help="image file path or URL.",
     )
     args = parser.parse_args([] if is_test else None)
-    model = demo_model_from_cli_args(SINet, args)
-    validate_on_device_demo_args(args, SINet.get_model_id())
+    model = demo_model_from_cli_args(SINet, MODEL_ID, args)
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # load image and model
     image = load_image(args.image)
diff --git a/qai_hub_models/models/sinet/export.py b/qai_hub_models/models/sinet/export.py
index 3e9d21c7..780ac793 100644
--- a/qai_hub_models/models/sinet/export.py
+++ b/qai_hub_models/models/sinet/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/sinet/info.yaml b/qai_hub_models/models/sinet/info.yaml
index 94bcbfc6..e7a6719f 100644
--- a/qai_hub_models/models/sinet/info.yaml
+++ b/qai_hub_models/models/sinet/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/1911.09099
 research_paper_title: 'SINet: Extreme Lightweight Portrait Segmentation Networks with
   Spatial Squeeze Modules and Information Blocking Decoder'
 license: https://github.com/clovaai/ext_portrait_segmentation/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/clovaai/ext_portrait_segmentation
 technical_details:
   Model checkpoint: SINet.pth
@@ -32,4 +33,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/sinet/model.py b/qai_hub_models/models/sinet/model.py
index ea82116f..76625bd3 100644
--- a/qai_hub_models/models/sinet/model.py
+++ b/qai_hub_models/models/sinet/model.py
@@ -5,12 +5,14 @@
 from __future__ import annotations
 
 import os
+from importlib import reload
 
 import torch
 
 from qai_hub_models.utils.asset_loaders import (
     CachedWebModelAsset,
     SourceAsRoot,
+    find_replace_in_repo,
     load_torch,
 )
 from qai_hub_models.utils.base_model import BaseModel
@@ -54,8 +56,8 @@ def forward(self, image: torch.Tensor) -> torch.Tensor:
         """
         return self.model(image)
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         num_channels: int = 3,
         height: int = 224,
@@ -86,7 +88,14 @@ def _load_sinet_source_model_from_weights(
 ) -> torch.nn.Module:
     with SourceAsRoot(
         SINET_SOURCE_REPOSITORY, SINET_SOURCE_REPO_COMMIT, MODEL_ID, MODEL_ASSET_VERSION
-    ):
+    ) as repo_root:
+        # This repository has a top-level "models", which is common. We
+        # explicitly reload it in case it has been loaded and cached by another
+        # package (or our models when executing from qai_hub_models/)
+        import models
+
+        reload(models)
+
         if os.path.exists(os.path.expanduser(weights_name_or_path)):
             weights_path = os.path.expanduser(weights_name_or_path)
         else:
@@ -99,11 +108,7 @@ def _load_sinet_source_model_from_weights(
 
         # Perform a find and replace for .data.size() in SINet's shuffle implementation
         # as tracing treats this as a constant, but does not treat .shape as a constant
-        with open("models/SINet.py", "r") as file:
-            file_content = file.read()
-        new_content = file_content.replace(".data.size()", ".shape")
-        with open("models/SINet.py", "w") as file:
-            file.write(new_content)
+        find_replace_in_repo(repo_root, "models/SINet.py", ".data.size()", ".shape")
 
         # import the model arch
         from models.SINet import SINet
diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml
index 936c588b..914dbe7c 100644
--- a/qai_hub_models/models/sinet/perf.yaml
+++ b/qai_hub_models/models/sinet/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: SINet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1817.0
-      throughput: 550.357732526142
+      inference_time: 1809.0
+      throughput: 552.791597567717
       estimated_peak_memory_range:
-        min: 434176
-        max: 2872792
+        min: 20480
+        max: 2244048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jegnzmkmg
+      job_id: jw566wn5o
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:17:54.436410Z'
     torchscript_onnx_qnn:
-      inference_time: 1192.0
-      throughput: 838.9261744966443
+      inference_time: 1193.0
+      throughput: 838.2229673093043
       estimated_peak_memory_range:
-        min: 622592
-        max: 51366312
+        min: 20480
+        max: 25094232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 187
-      job_id: joprl2wep
+      job_id: jwgoy8158
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1170.0
+      throughput: 854.7008547008547
+      estimated_peak_memory_range:
+        min: 12288
+        max: 24922736
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 240
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 240
+      job_id: j1p3k6m52
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:11:37.141843Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:22:20.419307Z'
+    torchscript_onnx_qnn:
+      inference_time: 802.0
+      throughput: 1246.8827930174564
+      estimated_peak_memory_range:
+        min: 12288
+        max: 65545232
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 187
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 187
+      job_id: j1pv37z5x
+      job_status: Passed
diff --git a/qai_hub_models/models/sinet/test.py b/qai_hub_models/models/sinet/test.py
index 365aad84..4780256f 100644
--- a/qai_hub_models/models/sinet/test.py
+++ b/qai_hub_models/models/sinet/test.py
@@ -32,5 +32,6 @@ def test_task():
     )
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/squeezenet1_1/README.md b/qai_hub_models/models/squeezenet1_1/README.md
index 37e48f82..ed782b23 100644
--- a/qai_hub_models/models/squeezenet1_1/README.md
+++ b/qai_hub_models/models/squeezenet1_1/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of SqueezeNet-1_1 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/squeezenet1_1).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.squeezenet1_1.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SqueezeNet-1_1 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360)
diff --git a/qai_hub_models/models/squeezenet1_1/conftest.py b/qai_hub_models/models/squeezenet1_1/conftest.py
new file mode 100644
index 00000000..6f693de1
--- /dev/null
+++ b/qai_hub_models/models/squeezenet1_1/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.squeezenet1_1 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.squeezenet1_1.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/squeezenet1_1/demo.py b/qai_hub_models/models/squeezenet1_1/demo.py
index 75640bd4..2eed38c2 100644
--- a/qai_hub_models/models/squeezenet1_1/demo.py
+++ b/qai_hub_models/models/squeezenet1_1/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.squeezenet1_1.model import SqueezeNet
+from qai_hub_models.models.squeezenet1_1.model import MODEL_ID, SqueezeNet
 
 
 def main(is_test: bool = False):
-    imagenet_demo(SqueezeNet, is_test)
+    imagenet_demo(SqueezeNet, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/squeezenet1_1/export.py b/qai_hub_models/models/squeezenet1_1/export.py
index 9c1da861..1586bb36 100644
--- a/qai_hub_models/models/squeezenet1_1/export.py
+++ b/qai_hub_models/models/squeezenet1_1/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/squeezenet1_1/info.yaml b/qai_hub_models/models/squeezenet1_1/info.yaml
index 3763db61..dba5e172 100644
--- a/qai_hub_models/models/squeezenet1_1/info.yaml
+++ b/qai_hub_models/models/squeezenet1_1/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/1602.07360
 research_paper_title: 'SqueezeNet: AlexNet-level accuracy with 50x fewer parameters
   and <0.5MB model size'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/squeezenet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/squeezenet1_1/model.py b/qai_hub_models/models/squeezenet1_1/model.py
index 6a08155f..9d97e4c0 100644
--- a/qai_hub_models/models/squeezenet1_1/model.py
+++ b/qai_hub_models/models/squeezenet1_1/model.py
@@ -14,6 +14,6 @@
 
 class SqueezeNet(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> SqueezeNet:
         net = tv_models.squeezenet1_1(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml
index 6fe797b1..217d17b8 100644
--- a/qai_hub_models/models/squeezenet1_1/perf.yaml
+++ b/qai_hub_models/models/squeezenet1_1/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: SqueezeNet-1_1
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 212.0
-      throughput: 4716.981132075472
+      inference_time: 225.0
+      throughput: 4444.444444444444
       estimated_peak_memory_range:
-        min: 20480
-        max: 1439360
+        min: 24576
+        max: 1431872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: j1pvlre75
+      job_id: j1p8ol8g9
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:32:29.373813Z'
     torchscript_onnx_qnn:
-      inference_time: 280.0
-      throughput: 3571.4285714285716
+      inference_time: 278.0
+      throughput: 3597.122302158273
       estimated_peak_memory_range:
         min: 20480
-        max: 12471928
+        max: 53223728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: j7gjr2o7p
+      job_id: jn5q8jm57
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 181.0
+      throughput: 5524.861878453039
+      estimated_peak_memory_range:
+        min: 12288
+        max: 21672448
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 39
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 39
+      job_id: jogkzjogd
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:17:05.340427Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:34:27.758337Z'
+    torchscript_onnx_qnn:
+      inference_time: 199.0
+      throughput: 5025.125628140703
+      estimated_peak_memory_range:
+        min: 618496
+        max: 28404384
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 69
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 69
+      job_id: j1glnjlpv
+      job_status: Passed
diff --git a/qai_hub_models/models/squeezenet1_1/test.py b/qai_hub_models/models/squeezenet1_1/test.py
index 0b6f2e19..8d745b7f 100644
--- a/qai_hub_models/models/squeezenet1_1/test.py
+++ b/qai_hub_models/models/squeezenet1_1/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(SqueezeNet.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(SqueezeNet.from_pretrained())
 
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/README.md b/qai_hub_models/models/squeezenet1_1_quantized/README.md
index 89b9b080..ebb01c7c 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/README.md
+++ b/qai_hub_models/models/squeezenet1_1_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of SqueezeNet-1_1Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/squeezenet1_1_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.squeezenet1_1_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of SqueezeNet-1_1Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360)
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/conftest.py b/qai_hub_models/models/squeezenet1_1_quantized/conftest.py
new file mode 100644
index 00000000..d216d03d
--- /dev/null
+++ b/qai_hub_models/models/squeezenet1_1_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.squeezenet1_1_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.squeezenet1_1_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/demo.py b/qai_hub_models/models/squeezenet1_1_quantized/demo.py
index fdd8fc5d..f6b6145b 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/demo.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.squeezenet1_1_quantized.model import SqueezeNetQuantizable
+from qai_hub_models.models.squeezenet1_1_quantized.model import (
+    MODEL_ID,
+    SqueezeNetQuantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(SqueezeNetQuantizable, is_test)
+    imagenet_demo(SqueezeNetQuantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/export.py b/qai_hub_models/models/squeezenet1_1_quantized/export.py
index 8e42fb41..e4256985 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/export.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,33 +163,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml
index 6284192b..8daf5c4d 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml
+++ b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml
@@ -15,6 +15,7 @@ research_paper: https://arxiv.org/abs/1602.07360
 research_paper_title: 'SqueezeNet: AlexNet-level accuracy with 50x fewer parameters
   and <0.5MB model size'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/squeezenet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/model.py b/qai_hub_models/models/squeezenet1_1_quantized/model.py
index 554e4a15..67a3f532 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/model.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/model.py
@@ -14,14 +14,16 @@
 
 import torch
 from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.squeezenet1_1.model import SqueezeNet
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 1
+MODEL_ASSET_VERSION = 2
 DEFAULT_ENCODINGS = "squeezenet1_1_quantized_encodings.json"
 
 
@@ -37,9 +39,15 @@ def __init__(
     ) -> None:
         SqueezeNet.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=True
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
@@ -53,15 +61,16 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = SqueezeNet.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
 
+        model = prepare_model(model)
         equalize_model(model, input_shape)
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
+            config_file=get_default_aimet_config(),
             dummy_input=torch.rand(input_shape),
         )
 
@@ -74,3 +83,11 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
index 00894c2f..fcf427fe 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
+++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: SqueezeNet-1_1Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 215.0
-      throughput: 4651.162790697675
+      inference_time: 176.0
+      throughput: 5681.818181818182
       estimated_peak_memory_range:
-        min: 20480
-        max: 1657648
+        min: 12288
+        max: 2498992
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
+      layer_info:
+        layers_on_npu: 39
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 39
+      job_id: jnp10jk5q
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:48:25.505884Z'
+    torchscript_onnx_qnn:
+      inference_time: 185.0
+      throughput: 5405.405405405405
+      estimated_peak_memory_range:
+        min: 172032
+        max: 55116856
+      primary_compute_unit: NPU
+      precision: int8
       layer_info:
         layers_on_npu: 43
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 43
-      job_id: jegnzmovg
+      job_id: jqp4q1qgo
       job_status: Passed
-    torchscript_onnx_qnn:
-      inference_time: 227.0
-      throughput: 4405.286343612334
+  - torchscript_onnx_tflite:
+      inference_time: 135.0
+      throughput: 7407.407407407408
       estimated_peak_memory_range:
-        min: 622592
-        max: 62441592
+        min: 12288
+        max: 21511824
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 45
+        layers_on_npu: 39
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 45
-      job_id: joprl2ovp
+        total_layers: 39
+      job_id: jz57z4qp3
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:29:43.800896Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:51:35.601938Z'
+    torchscript_onnx_qnn:
+      inference_time: 146.0
+      throughput: 6849.315068493151
+      estimated_peak_memory_range:
+        min: 159744
+        max: 18650384
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 43
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 43
+      job_id: jo5mrmygk
+      job_status: Passed
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/test.py b/qai_hub_models/models/squeezenet1_1_quantized/test.py
index bf4f2ec9..9c927cf5 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/test.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.squeezenet1_1_quantized.demo import main as demo_main
 from qai_hub_models.models.squeezenet1_1_quantized.model import (
@@ -25,16 +24,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        SqueezeNetQuantizable.from_pretrained(),
-        diff_tol=0.01,
-        rtol=0.02,
-        atol=0.2,
-        is_quantized=True,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/stable_diffusion_quantized/README.md b/qai_hub_models/models/stable_diffusion_quantized/README.md
index 04fbbce0..2cb891b9 100644
--- a/qai_hub_models/models/stable_diffusion_quantized/README.md
+++ b/qai_hub_models/models/stable_diffusion_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Stable-Diffusion found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/stable_diffusion_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.stable_diffusion_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Stable-Diffusion can be found
   [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)
diff --git a/qai_hub_models/models/stable_diffusion_quantized/export.py b/qai_hub_models/models/stable_diffusion_quantized/export.py
index 5f9280bd..eb3af6b4 100644
--- a/qai_hub_models/models/stable_diffusion_quantized/export.py
+++ b/qai_hub_models/models/stable_diffusion_quantized/export.py
@@ -9,25 +9,21 @@
 
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 
 from qai_hub_models.models.stable_diffusion_quantized import Model
 from qai_hub_models.utils.args import export_parser
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BasePrecompiledModel, TargetRuntime
 from qai_hub_models.utils.printing import print_profile_metrics_from_job
 from qai_hub_models.utils.qai_hub_helpers import (
     can_access_qualcomm_ai_hub,
     export_without_hub_access,
 )
 
-ALL_COMPONENTS = ["Text-Encoder-Quantized", "UNet-Quantized", "VAE-Decoder-Quantized"]
-DEFAULT_COMPONENTS = [
-    "Text-Encoder-Quantized",
-    "VAE-Decoder-Quantized",
-    "UNet-Quantized",
-]
+ALL_COMPONENTS = ["TextEncoder_Quantized", "UNet_Quantized", "VAEDecoder_Quantized"]
+DEFAULT_COMPONENTS = ["TextEncoder_Quantized", "VAEDecoder_Quantized", "UNet_Quantized"]
 
 
 def export_model(
@@ -79,9 +75,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or DEFAULT_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "stable_diffusion_quantized",
@@ -98,16 +94,17 @@ def export_model(
             component_arg,
         )
 
+    target_runtime = TargetRuntime.TFLITE
     # 1. Initialize model
     print("Initializing model class")
     model = Model.from_precompiled()
-    components_dict = {}
-    if "Text-Encoder-Quantized" in components:
-        components_dict["Text-Encoder-Quantized"] = model.text_encoder
-    if "UNet-Quantized" in components:
-        components_dict["UNet-Quantized"] = model.unet
-    if "VAE-Decoder-Quantized" in components:
-        components_dict["VAE-Decoder-Quantized"] = model.vae_decoder
+    components_dict: Dict[str, BasePrecompiledModel] = {}
+    if "TextEncoder_Quantized" in components:
+        components_dict["TextEncoder_Quantized"] = model.text_encoder  # type: ignore
+    if "UNet_Quantized" in components:
+        components_dict["UNet_Quantized"] = model.unet  # type: ignore
+    if "VAEDecoder_Quantized" in components:
+        components_dict["VAEDecoder_Quantized"] = model.vae_decoder  # type: ignore
 
     # 2. Upload model assets to hub
     print("Uploading model assets on hub")
@@ -118,39 +115,51 @@ def export_model(
         )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=uploaded_models[component_name],
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=uploaded_models[component_name],
                 inputs=sample_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Summarize the results from profiling
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     return {
diff --git a/qai_hub_models/models/stable_diffusion_quantized/info.yaml b/qai_hub_models/models/stable_diffusion_quantized/info.yaml
index 86efe1b8..ceac7d79 100644
--- a/qai_hub_models/models/stable_diffusion_quantized/info.yaml
+++ b/qai_hub_models/models/stable_diffusion_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/2112.10752
 research_paper_title: High-Resolution Image Synthesis with Latent Diffusion Models
 license: https://github.com/CompVis/stable-diffusion/blob/main/LICENSE
+deploy_license: https://github.com/CompVis/stable-diffusion/blob/main/LICENSE
 source_repo: https://github.com/CompVis/stable-diffusion/tree/main
 technical_details:
   Input: Text prompt to generate image
@@ -34,4 +35,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: creativeml-openrail-m
+deploy_license_type: creativeml-openrail-m
 dataset: []
diff --git a/qai_hub_models/models/stable_diffusion_quantized/model.py b/qai_hub_models/models/stable_diffusion_quantized/model.py
index 54d0144a..f9da4488 100644
--- a/qai_hub_models/models/stable_diffusion_quantized/model.py
+++ b/qai_hub_models/models/stable_diffusion_quantized/model.py
@@ -6,8 +6,9 @@
 
 import os
 
+from qai_hub_models.models.protocols import FromPrecompiledProtocol
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
-from qai_hub_models.utils.base_model import BasePrecompiledModel
+from qai_hub_models.utils.base_model import BasePrecompiledModel, CollectionModel
 from qai_hub_models.utils.input_spec import InputSpec
 
 MODEL_ID = __name__.split(".")[-2]
@@ -18,7 +19,7 @@
 VAE_DECODER = os.path.join(QNN_SDK_PREFIX, "vae_decoder.serialized.bin")
 
 
-class StableDiffusionQuantized:
+class StableDiffusionQuantized(FromPrecompiledProtocol, CollectionModel):
     """
     Stable Diffusion wrapper class consists of
         - Text Encoder
@@ -51,9 +52,6 @@ class ClipVITTextEncoder(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "ClipVITTextEncoder":
         text_encoder_path = CachedWebModelAsset.from_asset_store(
@@ -61,10 +59,8 @@ def from_precompiled(cls) -> "ClipVITTextEncoder":
         ).fetch()
         return ClipVITTextEncoder(text_encoder_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {"input_1": ((1, 77), "int32")}
 
 
@@ -76,9 +72,6 @@ class Unet(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "Unet":
         model_path = CachedWebModelAsset.from_asset_store(
@@ -86,10 +79,8 @@ def from_precompiled(cls) -> "Unet":
         ).fetch()
         return Unet(model_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {
             "input_1": ((1, 64, 64, 4), "float32"),
             "input_2": ((1, 1280), "float32"),
@@ -105,9 +96,6 @@ class VAEDecoder(BasePrecompiledModel):
     and compiled into serialized binary for Qualcomm Snapdragon Gen2+.
     """
 
-    def __init__(self, target_model_path) -> None:
-        self.target_model_path = target_model_path
-
     @classmethod
     def from_precompiled(cls) -> "VAEDecoder":
         model_path = CachedWebModelAsset.from_asset_store(
@@ -115,8 +103,6 @@ def from_precompiled(cls) -> "VAEDecoder":
         ).fetch()
         return VAEDecoder(model_path)
 
-    def get_target_model_path(self) -> str:
-        return self.target_model_path
-
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         return {"input_1": ((1, 64, 64, 4), "float32")}
diff --git a/qai_hub_models/models/stable_diffusion_quantized/requirements.txt b/qai_hub_models/models/stable_diffusion_quantized/requirements.txt
index e21d8196..83aa3d48 100644
--- a/qai_hub_models/models/stable_diffusion_quantized/requirements.txt
+++ b/qai_hub_models/models/stable_diffusion_quantized/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.31.0
+transformers==4.27.4
 diffusers[torch]==0.21.4
diff --git a/qai_hub_models/models/stable_diffusion_quantized/test.py b/qai_hub_models/models/stable_diffusion_quantized/test.py
index b1c0b2b0..b0cc4bf5 100644
--- a/qai_hub_models/models/stable_diffusion_quantized/test.py
+++ b/qai_hub_models/models/stable_diffusion_quantized/test.py
@@ -8,6 +8,13 @@
 
 from qai_hub_models.models.stable_diffusion_quantized.demo import main as demo_main
 from qai_hub_models.models.stable_diffusion_quantized.export import export_model
+from qai_hub_models.models.stable_diffusion_quantized.model import (
+    StableDiffusionQuantized,
+)
+
+
+def test_from_precompiled():
+    StableDiffusionQuantized.from_precompiled()
 
 
 @pytest.mark.skip("#105 move slow_cloud and slow tests to nightly.")
diff --git a/qai_hub_models/models/stylegan2/README.md b/qai_hub_models/models/stylegan2/README.md
index 2068b45a..be4cac59 100644
--- a/qai_hub_models/models/stylegan2/README.md
+++ b/qai_hub_models/models/stylegan2/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of StyleGAN2 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/stylegan2).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.stylegan2.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of StyleGAN2 can be found
   [here](https://github.com/NVlabs/stylegan3/blob/main/LICENSE.txt).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Analyzing and Improving the Image Quality of StyleGAN](http://arxiv.org/abs/1912.04958)
diff --git a/qai_hub_models/models/stylegan2/conftest.py b/qai_hub_models/models/stylegan2/conftest.py
new file mode 100644
index 00000000..d5441390
--- /dev/null
+++ b/qai_hub_models/models/stylegan2/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.stylegan2 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.stylegan2.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/stylegan2/demo.py b/qai_hub_models/models/stylegan2/demo.py
index e3d0f99e..dc28952f 100644
--- a/qai_hub_models/models/stylegan2/demo.py
+++ b/qai_hub_models/models/stylegan2/demo.py
@@ -5,7 +5,7 @@
 import torch
 
 from qai_hub_models.models.stylegan2.app import StyleGAN2App
-from qai_hub_models.models.stylegan2.model import StyleGAN2
+from qai_hub_models.models.stylegan2.model import MODEL_ID, StyleGAN2
 from qai_hub_models.utils.args import (
     demo_model_from_cli_args,
     get_model_cli_parser,
@@ -46,7 +46,7 @@ def main(is_test: bool = False):
 
     # Create model and app
     model = model_from_cli_args(StyleGAN2, args)
-    inference_model = demo_model_from_cli_args(StyleGAN2, args)
+    inference_model = demo_model_from_cli_args(StyleGAN2, MODEL_ID, args)
     app = StyleGAN2App(inference_model, model.output_size, model.num_classes)
 
     # Verify model input args
diff --git a/qai_hub_models/models/stylegan2/export.py b/qai_hub_models/models/stylegan2/export.py
index 0ac5ef8c..520ed574 100644
--- a/qai_hub_models/models/stylegan2/export.py
+++ b/qai_hub_models/models/stylegan2/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -108,63 +108,72 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_output output_0"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
         sample_inputs = model.sample_inputs(input_spec)
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=sample_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/stylegan2/info.yaml b/qai_hub_models/models/stylegan2/info.yaml
index 4e624753..d063ff28 100644
--- a/qai_hub_models/models/stylegan2/info.yaml
+++ b/qai_hub_models/models/stylegan2/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: http://arxiv.org/abs/1912.04958
 research_paper_title: Analyzing and Improving the Image Quality of StyleGAN
 license: https://github.com/NVlabs/stylegan3/blob/main/LICENSE.txt
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/NVlabs/stylegan3
 technical_details:
   Model checkpoint: StyleGAN2 (afhqcat dataset)
@@ -29,4 +30,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/stylegan2/model.py b/qai_hub_models/models/stylegan2/model.py
index eddb21cd..45071a48 100644
--- a/qai_hub_models/models/stylegan2/model.py
+++ b/qai_hub_models/models/stylegan2/model.py
@@ -10,7 +10,7 @@
 import torch
 
 from qai_hub_models.utils.asset_loaders import SourceAsRoot
-from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.input_spec import InputSpec
 
 STYLEGAN2_SOURCE_REPOSITORY = "https://github.com/NVlabs/stylegan3"
@@ -82,21 +82,29 @@ def forward(self, image_noise: torch.Tensor, classes: torch.Tensor | None = None
             force_fp32=True,
         )
 
-    def get_input_spec(self, batch_size: int = 1) -> InputSpec:
+    @staticmethod
+    def get_input_spec(
+        output_size: int, num_classes: int, batch_size: int = 1
+    ) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type). This can be
         used to submit a profiling job on Qualcomm AI Hub.
         """
-        inputs = {"image_noise": ((batch_size, self.output_size), "float32")}
-        if self.num_classes != 0:
-            inputs["classes"] = ((batch_size, self.num_classes), "float32")
-        return inputs  # type: ignore
+        inputs = {"image_noise": ((batch_size, output_size), "float32")}
+        if num_classes != 0:
+            inputs["classes"] = ((batch_size, num_classes), "float32")
+        return inputs
+
+    def _get_input_spec_for_model_instance(self, batch_size: int = 1) -> InputSpec:
+        return self.__class__.get_input_spec(
+            self.output_size, self.num_classes, batch_size
+        )
 
     def sample_inputs(
         self, input_spec: InputSpec | None = None, seed=None
     ) -> Dict[str, List[np.ndarray]]:
         if not input_spec:
-            input_spec = self.get_input_spec()
+            input_spec = self._get_input_spec_for_model_instance()
 
         inputs = {
             "image_noise": [
@@ -113,6 +121,22 @@ def sample_inputs(
 
         return inputs
 
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --compute_unit gpu"
+
+    def get_hub_profile_options(
+        self, target_runtime: TargetRuntime, other_profile_options: str = ""
+    ) -> str:
+        profile_options = super().get_hub_profile_options(
+            target_runtime, other_profile_options
+        )
+        return profile_options + " --compute_unit gpu"
+
 
 def _get_qaihm_upfirdn2d_ref(misc: Any, conv2d_gradfix: Callable, upfirdn2d: Any):
     """
diff --git a/qai_hub_models/models/stylegan2/perf.yaml b/qai_hub_models/models/stylegan2/perf.yaml
index c762bf13..856842a6 100644
--- a/qai_hub_models/models/stylegan2/perf.yaml
+++ b/qai_hub_models/models/stylegan2/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: StyleGAN2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1280066.0
-      throughput: 0.7812097188738706
+      inference_time: 1218362.0
+      throughput: 0.8207741213202644
       estimated_peak_memory_range:
-        min: 1790029824
-        max: 2607953504
+        min: 1358295040
+        max: 1361471248
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 89
-        layers_on_cpu: 462
-        total_layers: 551
-      job_id: jz57elvqp
+        layers_on_cpu: 492
+        total_layers: 581
+      job_id: jlpe988gr
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:03:19.171321Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 980347.0
+      throughput: 1.0200469833640537
+      estimated_peak_memory_range:
+        min: 1110478848
+        max: 1142166720
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 89
+        layers_on_cpu: 492
+        total_layers: 581
+      job_id: jz5wo84p1
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:56.125164Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:03:19.171331Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/stylegan2/requirements.txt b/qai_hub_models/models/stylegan2/requirements.txt
index b8261cd7..7317e178 100644
--- a/qai_hub_models/models/stylegan2/requirements.txt
+++ b/qai_hub_models/models/stylegan2/requirements.txt
@@ -1 +1 @@
-click>=8.0
+click==8.0
diff --git a/qai_hub_models/models/stylegan2/test.py b/qai_hub_models/models/stylegan2/test.py
index df1f75c6..a55dbe26 100644
--- a/qai_hub_models/models/stylegan2/test.py
+++ b/qai_hub_models/models/stylegan2/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 import torch
 
 from qai_hub_models.models.stylegan2.app import StyleGAN2App
@@ -52,6 +53,7 @@ def test_stylegan2_app():
     assert len(output_images) == 2
 
 
+@pytest.mark.trace
 @skip_clone_repo_check
 def test_stylegan2_trace():
     app = StyleGAN2App(StyleGAN2.from_pretrained().convert_to_torchscript())
diff --git a/qai_hub_models/models/swin_base/README.md b/qai_hub_models/models/swin_base/README.md
index d5663088..cf886066 100644
--- a/qai_hub_models/models/swin_base/README.md
+++ b/qai_hub_models/models/swin_base/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Swin-Base found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/swin_base).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.swin_base.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Swin-Base can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030)
diff --git a/qai_hub_models/models/swin_base/conftest.py b/qai_hub_models/models/swin_base/conftest.py
new file mode 100644
index 00000000..d866a771
--- /dev/null
+++ b/qai_hub_models/models/swin_base/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.swin_base import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.swin_base.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/swin_base/demo.py b/qai_hub_models/models/swin_base/demo.py
index bf03b593..f9d17b55 100644
--- a/qai_hub_models/models/swin_base/demo.py
+++ b/qai_hub_models/models/swin_base/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.swin_base.model import SwinBase
+from qai_hub_models.models.swin_base.model import MODEL_ID, SwinBase
 
 
 def main(is_test: bool = False):
-    imagenet_demo(SwinBase, is_test)
+    imagenet_demo(SwinBase, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/swin_base/export.py b/qai_hub_models/models/swin_base/export.py
index 50aa82df..b9e657bd 100644
--- a/qai_hub_models/models/swin_base/export.py
+++ b/qai_hub_models/models/swin_base/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/swin_base/info.yaml b/qai_hub_models/models/swin_base/info.yaml
index 15d83d1c..00a55170 100644
--- a/qai_hub_models/models/swin_base/info.yaml
+++ b/qai_hub_models/models/swin_base/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/2103.14030
 research_paper_title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted
   Windows'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py
 technical_details:
@@ -39,6 +40,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml
index c27cc0d3..d28fc1c7 100644
--- a/qai_hub_models/models/swin_base/perf.yaml
+++ b/qai_hub_models/models/swin_base/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Swin-Base
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 76852.0
-      throughput: 13.012023109353041
+      inference_time: 66948.0
+      throughput: 14.936966003465376
       estimated_peak_memory_range:
-        min: 12288
-        max: 367871696
-      primary_compute_unit: GPU
+        min: 28672
+        max: 6112608
+      primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 0
-        layers_on_gpu: 2006
+        layers_on_npu: 1614
+        layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 2006
-      job_id: jw568zrvg
+        total_layers: 1614
+      job_id: jogkzm2gd
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:23:06.160602Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 43458.0
+      throughput: 23.010722996916563
+      estimated_peak_memory_range:
+        min: 69632
+        max: 472671520
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1614
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1614
+      job_id: jn5q8o457
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:09:41.513292Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:23:06.160610Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/swin_base/test.py b/qai_hub_models/models/swin_base/test.py
index 3f302ff1..dce247a4 100644
--- a/qai_hub_models/models/swin_base/test.py
+++ b/qai_hub_models/models/swin_base/test.py
@@ -13,7 +13,7 @@
 from qai_hub_models.models.swin_base.model import MODEL_ID, SwinBase
 
 
-def test_task(imagenet_sample_torch):
+def test_numerical(imagenet_sample_torch):
     # Ensure that the optimized SwinBase matches the original one numerically
     x = imagenet_sample_torch
     model_opt = SwinBase.from_pretrained().eval()
diff --git a/qai_hub_models/models/swin_small/README.md b/qai_hub_models/models/swin_small/README.md
index 856bedb6..f323fa2a 100644
--- a/qai_hub_models/models/swin_small/README.md
+++ b/qai_hub_models/models/swin_small/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Swin-Small found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/swin_small).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.swin_small.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Swin-Small can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030)
diff --git a/qai_hub_models/models/swin_small/conftest.py b/qai_hub_models/models/swin_small/conftest.py
new file mode 100644
index 00000000..73bf1779
--- /dev/null
+++ b/qai_hub_models/models/swin_small/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.swin_small import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.swin_small.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/swin_small/demo.py b/qai_hub_models/models/swin_small/demo.py
index dd78cca1..ec65fdc7 100644
--- a/qai_hub_models/models/swin_small/demo.py
+++ b/qai_hub_models/models/swin_small/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.swin_small.model import SwinSmall
+from qai_hub_models.models.swin_small.model import MODEL_ID, SwinSmall
 
 
 def main(is_test: bool = False):
-    imagenet_demo(SwinSmall, is_test)
+    imagenet_demo(SwinSmall, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/swin_small/export.py b/qai_hub_models/models/swin_small/export.py
index 9e7505bb..ed29fb83 100644
--- a/qai_hub_models/models/swin_small/export.py
+++ b/qai_hub_models/models/swin_small/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/swin_small/info.yaml b/qai_hub_models/models/swin_small/info.yaml
index b783fb0c..ac042fe2 100644
--- a/qai_hub_models/models/swin_small/info.yaml
+++ b/qai_hub_models/models/swin_small/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/2103.14030
 research_paper_title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted
   Windows'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py
 technical_details:
@@ -38,6 +39,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml
index d481ee20..2e615a93 100644
--- a/qai_hub_models/models/swin_small/perf.yaml
+++ b/qai_hub_models/models/swin_small/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Swin-Small
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 52492.0
-      throughput: 19.05052198430237
+      inference_time: 50143.0
+      throughput: 19.94296312546118
       estimated_peak_memory_range:
-        min: 12288
-        max: 222000632
-      primary_compute_unit: GPU
+        min: 90112
+        max: 3612056
+      primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 0
-        layers_on_gpu: 1965
+        layers_on_npu: 1609
+        layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 1965
-      job_id: jlpe7wl05
+        total_layers: 1609
+      job_id: jo5mr9ygk
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:53:07.887698Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 33054.0
+      throughput: 30.2535245356084
+      estimated_peak_memory_range:
+        min: 45056
+        max: 454274336
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1609
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1609
+      job_id: jegn2qvgo
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:30:42.368348Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:53:07.887705Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/swin_small/test.py b/qai_hub_models/models/swin_small/test.py
index 3ce1e0ea..8bcacc9e 100644
--- a/qai_hub_models/models/swin_small/test.py
+++ b/qai_hub_models/models/swin_small/test.py
@@ -13,7 +13,7 @@
 from qai_hub_models.models.swin_small.model import MODEL_ID, SwinSmall
 
 
-def test_task(imagenet_sample_torch):
+def test_numerical(imagenet_sample_torch):
     # Ensure that the optimized SwinSmall matches the original one numerically
     x = imagenet_sample_torch
     model_opt = SwinSmall.from_pretrained().eval()
diff --git a/qai_hub_models/models/swin_tiny/README.md b/qai_hub_models/models/swin_tiny/README.md
index 19c7a416..0c8a8564 100644
--- a/qai_hub_models/models/swin_tiny/README.md
+++ b/qai_hub_models/models/swin_tiny/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Swin-Tiny found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/swin_tiny).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.swin_tiny.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Swin-Tiny can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030)
diff --git a/qai_hub_models/models/swin_tiny/conftest.py b/qai_hub_models/models/swin_tiny/conftest.py
new file mode 100644
index 00000000..08d176fe
--- /dev/null
+++ b/qai_hub_models/models/swin_tiny/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.swin_tiny import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.swin_tiny.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/swin_tiny/demo.py b/qai_hub_models/models/swin_tiny/demo.py
index cd8aac95..782e73d6 100644
--- a/qai_hub_models/models/swin_tiny/demo.py
+++ b/qai_hub_models/models/swin_tiny/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.swin_tiny.model import SwinTiny
+from qai_hub_models.models.swin_tiny.model import MODEL_ID, SwinTiny
 
 
 def main(is_test: bool = False):
-    imagenet_demo(SwinTiny, is_test)
+    imagenet_demo(SwinTiny, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/swin_tiny/export.py b/qai_hub_models/models/swin_tiny/export.py
index 28760804..3f43d39b 100644
--- a/qai_hub_models/models/swin_tiny/export.py
+++ b/qai_hub_models/models/swin_tiny/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options + " --compute_unit gpu",
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/swin_tiny/info.yaml b/qai_hub_models/models/swin_tiny/info.yaml
index ee0345a8..aee47f6a 100644
--- a/qai_hub_models/models/swin_tiny/info.yaml
+++ b/qai_hub_models/models/swin_tiny/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/2103.14030
 research_paper_title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted
   Windows'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py
 technical_details:
@@ -38,6 +39,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml
index 7603ecf3..6e71c1a1 100644
--- a/qai_hub_models/models/swin_tiny/perf.yaml
+++ b/qai_hub_models/models/swin_tiny/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Swin-Tiny
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 29469.0
-      throughput: 33.93396450507313
+      inference_time: 31313.0
+      throughput: 31.935617794526234
       estimated_peak_memory_range:
-        min: 0
-        max: 193113472
-      primary_compute_unit: GPU
+        min: 81920
+        max: 3482152
+      primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 0
-        layers_on_gpu: 1059
+        layers_on_npu: 859
+        layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 1059
-      job_id: jqpyojx45
+        total_layers: 859
+      job_id: j0pxvv1g7
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:34:33.080588Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 20716.0
+      throughput: 48.27186715582159
+      estimated_peak_memory_range:
+        min: 49152
+        max: 274521296
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 859
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 859
+      job_id: jo5mrrwgk
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:18:27.047126Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:34:33.080597Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/swin_tiny/test.py b/qai_hub_models/models/swin_tiny/test.py
index 574e7c1e..1b20542f 100644
--- a/qai_hub_models/models/swin_tiny/test.py
+++ b/qai_hub_models/models/swin_tiny/test.py
@@ -14,7 +14,7 @@
 from qai_hub_models.models.swin_tiny.model import MODEL_ID, SwinTiny
 
 
-def test_task(imagenet_sample_torch):
+def test_numerical(imagenet_sample_torch):
     # Ensure that the optimized SwinTiny matches the original one numerically
     x = imagenet_sample_torch
     model_opt = SwinTiny.from_pretrained().eval()
diff --git a/qai_hub_models/models/trocr/README.md b/qai_hub_models/models/trocr/README.md
index 27f1a1a4..27fde033 100644
--- a/qai_hub_models/models/trocr/README.md
+++ b/qai_hub_models/models/trocr/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of TrOCR found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/trocr).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.trocr.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of TrOCR can be found
   [here](https://github.com/microsoft/unilm/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models](https://arxiv.org/abs/2109.10282)
diff --git a/qai_hub_models/models/trocr/conftest.py b/qai_hub_models/models/trocr/conftest.py
new file mode 100644
index 00000000..574e667a
--- /dev/null
+++ b/qai_hub_models/models/trocr/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.trocr import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.trocr.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py
index 8ed66055..37af35b9 100644
--- a/qai_hub_models/models/trocr/export.py
+++ b/qai_hub_models/models/trocr/export.py
@@ -10,14 +10,14 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
 
 from qai_hub_models.models.trocr import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -90,9 +90,9 @@ def export_model(
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or ALL_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "trocr",
@@ -111,68 +111,85 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "TrOCREncoder" in components:
-        components_dict["TrOCREncoder"] = model.encoder
+        components_dict["TrOCREncoder"] = model.encoder  # type: ignore
     if "TrOCRDecoder" in components:
-        components_dict["TrOCRDecoder"] = model.decoder
+        components_dict["TrOCRDecoder"] = model.decoder  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
             target_runtime, compile_options + " --force_channel_last_input pixel_values"
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
             # Convert inputs from channel first to channel last
             hub_inputs = transpose_channel_first_to_last(
                 "pixel_values", sample_inputs, target_runtime
             )
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=hub_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -181,8 +198,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -190,8 +207,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/trocr/info.yaml b/qai_hub_models/models/trocr/info.yaml
index e0755c8f..cd37a5e5 100644
--- a/qai_hub_models/models/trocr/info.yaml
+++ b/qai_hub_models/models/trocr/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/2109.10282
 research_paper_title: 'TrOCR: Transformer-based Optical Character Recognition with
   Pre-trained Models'
 license: https://github.com/microsoft/unilm/blob/master/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://huggingface.co/microsoft/trocr-small-stage1
 technical_details:
   Model checkpoint: trocr-small-stage1
@@ -33,4 +34,5 @@ related_models: []
 has_static_banner: yes
 has_animated_banner: yes
 license_type: mit
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/trocr/model.py b/qai_hub_models/models/trocr/model.py
index 6fc61625..ac627212 100644
--- a/qai_hub_models/models/trocr/model.py
+++ b/qai_hub_models/models/trocr/model.py
@@ -116,7 +116,8 @@ def forward(
 
         return (*kv_cache,)  # convert list to tuple for export
 
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec() -> InputSpec:
         # Get the input specification ordered (name -> (shape, type)) pairs for this model.
         #
         # This can be used with the qai_hub python API to declared
@@ -216,7 +217,10 @@ def forward(
             *out_kv_cache,
         )
 
-    def get_input_spec(self) -> InputSpec:
+    @staticmethod
+    def get_input_spec(
+        decoder_attention_heads: int, embeddings_per_head: int, num_decoder_layers: int
+    ) -> InputSpec:
         """
         Returns the input specification (name -> (shape, type). This can be
         used to submit profiling job on Qualcomm AI Hub.
@@ -226,9 +230,9 @@ def get_input_spec(self) -> InputSpec:
         attn_cache_spec = (
             (
                 TROCR_BATCH_SIZE,
-                self.decoder_attention_heads,
+                decoder_attention_heads,
                 TROCR_EXPORT_SEQ_LEN,
-                self.embeddings_per_head,
+                embeddings_per_head,
             ),
             "float32",
         )
@@ -236,15 +240,15 @@ def get_input_spec(self) -> InputSpec:
         cross_attn_cache_spec = (
             (
                 TROCR_BATCH_SIZE,
-                self.decoder_attention_heads,
+                decoder_attention_heads,
                 578,  # TODO: Can we get this programatically?
-                self.embeddings_per_head,
+                embeddings_per_head,
             ),
             "float32",
         )
 
         decoder_input_specs: InputSpec = {"input_ids": input_ids_spec}
-        for i in range(0, self.num_decoder_layers):
+        for i in range(0, num_decoder_layers):
             decoder_input_specs[f"kv_{i}_attn_key"] = attn_cache_spec
             decoder_input_specs[f"kv_{i}_attn_val"] = attn_cache_spec
             decoder_input_specs[f"kv_{i}_cross_attn_key"] = cross_attn_cache_spec
@@ -252,6 +256,13 @@ def get_input_spec(self) -> InputSpec:
 
         return decoder_input_specs
 
+    def _get_input_spec_for_model_instance(self) -> InputSpec:
+        return self.__class__.get_input_spec(
+            self.decoder_attention_heads,
+            self.embeddings_per_head,
+            self.num_decoder_layers,
+        )
+
     @classmethod
     def from_pretrained(cls):
         return TrOCR.from_pretrained().decoder
diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml
index b9cea027..df5315f1 100644
--- a/qai_hub_models/models/trocr/perf.yaml
+++ b/qai_hub_models/models/trocr/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: TrOCREncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 244369.0
-      throughput: 4.092172084020477
+      inference_time: 243976.0
+      throughput: 4.098763812834049
       estimated_peak_memory_range:
-        min: 7294976
-        max: 10455296
+        min: 7221248
+        max: 10173368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 627
-      job_id: j2p0m26eg
+      job_id: j7gjxxxpd
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:46:04.016709Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,22 +68,52 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 182193.0
+      throughput: 5.48868507571641
+      estimated_peak_memory_range:
+        min: 20480
+        max: 305620528
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 627
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 627
+      job_id: jygzeekg8
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:39.426796Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:46:04.016721Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
 - name: TrOCRDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2820.0
-      throughput: 354.6099290780142
+      inference_time: 2810.0
+      throughput: 355.87188612099646
       estimated_peak_memory_range:
-        min: 20480
-        max: 2212720
+        min: 12288
+        max: 2353880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -80,8 +121,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 394
-      job_id: j1p8em18p
+      job_id: jlpe991gr
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:51:23.352323Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -97,11 +146,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 2018.0
+      throughput: 495.5401387512388
+      estimated_peak_memory_range:
+        min: 12288
+        max: 193404384
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 394
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 394
+      job_id: jz5wov6p1
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:34:45.126605Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:51:23.352351Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/trocr/requirements.txt b/qai_hub_models/models/trocr/requirements.txt
index 3a308074..b000abd3 100644
--- a/qai_hub_models/models/trocr/requirements.txt
+++ b/qai_hub_models/models/trocr/requirements.txt
@@ -1,2 +1,2 @@
-transformers==4.33.2
-sentencepiece
+transformers==4.27.4
+sentencepiece==0.2.0
diff --git a/qai_hub_models/models/unet_segmentation/README.md b/qai_hub_models/models/unet_segmentation/README.md
index 1980acde..f47eb91c 100644
--- a/qai_hub_models/models/unet_segmentation/README.md
+++ b/qai_hub_models/models/unet_segmentation/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Unet-Segmentation found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/unet_segmentation).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.unet_segmentation.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Unet-Segmentation can be found
   [here](https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597)
diff --git a/qai_hub_models/models/unet_segmentation/conftest.py b/qai_hub_models/models/unet_segmentation/conftest.py
new file mode 100644
index 00000000..45c18443
--- /dev/null
+++ b/qai_hub_models/models/unet_segmentation/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.unet_segmentation import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.unet_segmentation.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/unet_segmentation/demo.py b/qai_hub_models/models/unet_segmentation/demo.py
index b1714696..b2e6666d 100644
--- a/qai_hub_models/models/unet_segmentation/demo.py
+++ b/qai_hub_models/models/unet_segmentation/demo.py
@@ -32,6 +32,7 @@
 # The demo will display the predicted mask in a window.
 def unet_demo(
     model: Callable[..., Callable[[torch.Tensor, torch.Tensor], torch.Tensor]],
+    MODEL_ID,
     default_image: PathType,
     is_test: bool = False,
 ):
@@ -45,10 +46,10 @@ def unet_demo(
         help="File path or URL to an input image to use for the demo.",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model.get_model_id())
+    validate_on_device_demo_args(args, MODEL_ID)
 
     # Load image & model
-    model = demo_model_from_cli_args(UNet, args)
+    model = demo_model_from_cli_args(UNet, MODEL_ID, args)
     print("Model loaded from pre-trained weights.")
     (_, _, height, width) = UNet.get_input_spec()["image"][0]
     orig_image = load_image(
@@ -67,6 +68,7 @@ def unet_demo(
 def main(is_test: bool = False):
     unet_demo(
         UNet,
+        MODEL_ID,
         IMAGE_ADDRESS,
         is_test,
     )
diff --git a/qai_hub_models/models/unet_segmentation/export.py b/qai_hub_models/models/unet_segmentation/export.py
index 94d2b2c4..11489e1b 100644
--- a/qai_hub_models/models/unet_segmentation/export.py
+++ b/qai_hub_models/models/unet_segmentation/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,7 +110,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -119,29 +119,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -150,37 +158,39 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
         )
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/unet_segmentation/info.yaml b/qai_hub_models/models/unet_segmentation/info.yaml
index b835940a..f53c724e 100644
--- a/qai_hub_models/models/unet_segmentation/info.yaml
+++ b/qai_hub_models/models/unet_segmentation/info.yaml
@@ -15,6 +15,7 @@ tags:
 research_paper: https://arxiv.org/abs/1505.04597
 research_paper_title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation'
 license: https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
+deploy_license: https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
 source_repo: https://github.com/milesial/Pytorch-UNet
 technical_details:
   Model checkpoint: unet_carvana_scale1.0_epoch2
@@ -35,4 +36,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: gpl-3.0
+deploy_license_type: gpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml
index 42a0fc85..8ff61006 100644
--- a/qai_hub_models/models/unet_segmentation/perf.yaml
+++ b/qai_hub_models/models/unet_segmentation/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Unet-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7708.0
-      throughput: 129.73533990659055
+      inference_time: 160694.0
+      throughput: 6.223007704083538
       estimated_peak_memory_range:
-        min: 442368
-        max: 29540072
+        min: 6688768
+        max: 229291048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j7gjr207p
+      job_id: jlpe9rvgr
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:14:31.434457Z'
     torchscript_onnx_qnn:
-      inference_time: 7735.0
-      throughput: 129.2824822236587
+      inference_time: 146509.0
+      throughput: 6.825519251377049
       estimated_peak_memory_range:
-        min: 421888
-        max: 282981312
+        min: 10952704
+        max: 44981480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 52
-      job_id: jlpe7wr75
+      job_id: jmg9v3857
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 118830.0
+      throughput: 8.415383320710259
+      estimated_peak_memory_range:
+        min: 6234112
+        max: 344093584
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 31
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 31
+      job_id: jz5wodmp1
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:43:41.073611Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:16:26.988161Z'
+    torchscript_onnx_qnn:
+      inference_time: 110459.0
+      throughput: 9.053132836618111
+      estimated_peak_memory_range:
+        min: 328994816
+        max: 420473984
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 52
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 52
+      job_id: jnp10d75q
+      job_status: Passed
diff --git a/qai_hub_models/models/vit/README.md b/qai_hub_models/models/vit/README.md
index 3dc2bac9..8d5812f7 100644
--- a/qai_hub_models/models/vit/README.md
+++ b/qai_hub_models/models/vit/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of VIT found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/vit).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.vit.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of VIT can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929)
diff --git a/qai_hub_models/models/vit/conftest.py b/qai_hub_models/models/vit/conftest.py
new file mode 100644
index 00000000..de0fda92
--- /dev/null
+++ b/qai_hub_models/models/vit/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.vit import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.vit.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/vit/demo.py b/qai_hub_models/models/vit/demo.py
index 53e6806c..69d7d74b 100644
--- a/qai_hub_models/models/vit/demo.py
+++ b/qai_hub_models/models/vit/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.vit.model import VIT
+from qai_hub_models.models.vit.model import MODEL_ID, VIT
 
 
 def main(is_test: bool = False):
-    imagenet_demo(VIT, is_test)
+    imagenet_demo(VIT, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py
index 31a1ccfc..bc43f4ae 100644
--- a/qai_hub_models/models/vit/export.py
+++ b/qai_hub_models/models/vit/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -110,36 +110,44 @@ def export_model(
 
     # Trace the model
     source_model = torch.jit.trace(
-        model, make_torch_inputs(input_spec), check_trace=False
+        model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -148,33 +156,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/vit/info.yaml b/qai_hub_models/models/vit/info.yaml
index ec8bbafc..6667f41f 100644
--- a/qai_hub_models/models/vit/info.yaml
+++ b/qai_hub_models/models/vit/info.yaml
@@ -14,6 +14,7 @@ research_paper: https://arxiv.org/abs/2010.11929
 research_paper_title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition
   at Scale'
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo:
   https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py
 technical_details:
@@ -37,6 +38,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/vit/model.py b/qai_hub_models/models/vit/model.py
index b25fba95..aa608719 100644
--- a/qai_hub_models/models/vit/model.py
+++ b/qai_hub_models/models/vit/model.py
@@ -14,6 +14,6 @@
 
 class VIT(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> VIT:
         net = tv_models.vit_b_16(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml
index 3eebfd79..6a9f6f8d 100644
--- a/qai_hub_models/models/vit/perf.yaml
+++ b/qai_hub_models/models/vit/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: VIT
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 135762.0
-      throughput: 7.365831381388017
+      inference_time: 136110.0
+      throughput: 7.346998751010212
       estimated_peak_memory_range:
-        min: 147456
-        max: 3331880
+        min: 86016
+        max: 3893632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: j1gly2ll5
+      job_id: j1gln9lpv
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:40:01.517909Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 100287.0
+      throughput: 9.971382133277494
+      estimated_peak_memory_range:
+        min: 163840
+        max: 401162112
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 557
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 557
+      job_id: jw566975o
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:21:41.057280Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:40:01.517918Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/whisper_asr/info.yaml b/qai_hub_models/models/whisper_asr/info.yaml
deleted file mode 100644
index cf001a33..00000000
--- a/qai_hub_models/models/whisper_asr/info.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: Whisper-Base
-# id must match with the model dir name in qai_hub_models
-id: whisper_asr
-status: public
-headline: Automatic speech recognition (ASR) model for multilingual transcription
-  as well as translation.
-domain: Audio
-description: State-of-art model encoder-decoder transformer. The encoder takes an
-  audio chunk (around 30 second) converted to a log-Mel spectrogram.  The decoder
-  predicts the corresponding text caption intermixed with special tokens that can
-  be used to direct the single model to perform various speech tasks.
-use_case: Speech Recognition
-tags:
-  - foundation
-research_paper: https://cdn.openai.com/papers/whisper.pdf
-research_paper_title: Robust Speech Recognition via Large-Scale Weak Supervision
-license: https://github.com/openai/whisper/blob/main/LICENSE
-source_repo: https://github.com/openai/whisper/tree/main
-technical_details:
-  Model checkpoint: Tiny En
-  Input resolution: 80x3000
-  Number of parameters (WhisperEncoder): 9.39M
-  Model size (WhisperEncoder): 35.9 MB
-  Number of parameters (WhisperDecoder): 28.2M
-  Model size (WhisperDecoder): 108 MB
-applicable_scenarios:
-  - Smart Home
-  - Accessibility
-related_models:
-  - huggingface_wavlm_base_plus
-form_factors:
-  - Phone
-  - Tablet
-  - IoT
-has_static_banner: yes
-has_animated_banner: yes
-license_type: mit
-dataset: []
diff --git a/qai_hub_models/models/whisper_asr/README.md b/qai_hub_models/models/whisper_base_en/README.md
similarity index 58%
rename from qai_hub_models/models/whisper_asr/README.md
rename to qai_hub_models/models/whisper_base_en/README.md
index 88403a8d..f5a7658f 100644
--- a/qai_hub_models/models/whisper_asr/README.md
+++ b/qai_hub_models/models/whisper_base_en/README.md
@@ -1,16 +1,16 @@
 [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
 
 
-# [Whisper-Base: Automatic speech recognition (ASR) model for multilingual transcription as well as translation](https://aihub.qualcomm.com/models/whisper_asr)
+# [Whisper-Base-En: Automatic speech recognition (ASR) model for English transcription as well as translation](https://aihub.qualcomm.com/models/whisper_base_en)
 
-State-of-art model encoder-decoder transformer. The encoder takes an audio chunk (around 30 second) converted to a log-Mel spectrogram.  The decoder predicts the corresponding text caption intermixed with special tokens that can be used to direct the single model to perform various speech tasks.
+OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below.
 
-This is based on the implementation of Whisper-Base found
+This is based on the implementation of Whisper-Base-En found
 [here](https://github.com/openai/whisper/tree/main). This repository contains scripts for optimized on-device
 export suitable to run on Qualcomm® devices. More details on model performance
-accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_asr).
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_base_en).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -18,19 +18,19 @@ a hosted Qualcomm® device.
 
 Install the package via pip:
 ```bash
-pip install "qai_hub_models[whisper_asr]"
+pip install "qai_hub_models[whisper_base_en]"
 ```
 
 
 Once installed, run the following simple CLI demo:
 
 ```bash
-python -m qai_hub_models.models.whisper_asr.demo
+python -m qai_hub_models.models.whisper_base_en.demo
 ```
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -38,15 +38,15 @@ This repository contains export scripts that produce a model optimized for
 on-device deployment. This can be run as follows:
 
 ```bash
-python -m qai_hub_models.models.whisper_asr.export
+python -m qai_hub_models.models.whisper_base_en.export
 ```
 Additional options are documented with the `--help` option. Note that the above
 script requires access to Deployment instructions for Qualcomm® AI Hub.
 
 ## License
-- The license for the original implementation of Whisper-Base can be found
+- The license for the original implementation of Whisper-Base-En can be found
   [here](https://github.com/openai/whisper/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
diff --git a/qai_hub_models/models/whisper_asr/__init__.py b/qai_hub_models/models/whisper_base_en/__init__.py
similarity index 67%
rename from qai_hub_models/models/whisper_asr/__init__.py
rename to qai_hub_models/models/whisper_base_en/__init__.py
index 3f49ff9d..bac04dc1 100644
--- a/qai_hub_models/models/whisper_asr/__init__.py
+++ b/qai_hub_models/models/whisper_base_en/__init__.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-from .app import WhisperApp as App  # noqa: F401
+from qai_hub_models.models._shared.whisper.app import WhisperApp as App  # noqa: F401
+
 from .model import MODEL_ID  # noqa: F401
-from .model import Whisper as Model  # noqa: F401
+from .model import WhisperBaseEn as Model  # noqa: F401
diff --git a/qai_hub_models/models/whisper_base_en/conftest.py b/qai_hub_models/models/whisper_base_en/conftest.py
new file mode 100644
index 00000000..b0406dce
--- /dev/null
+++ b/qai_hub_models/models/whisper_base_en/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.whisper_base_en import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.whisper_base_en.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/whisper_base_en/demo.py b/qai_hub_models/models/whisper_base_en/demo.py
new file mode 100644
index 00000000..9bbf714a
--- /dev/null
+++ b/qai_hub_models/models/whisper_base_en/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.demo import whisper_demo
+from qai_hub_models.models.whisper_base_en.model import WhisperBaseEn
+
+
+def main():
+    whisper_demo(WhisperBaseEn)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/whisper_asr/export.py b/qai_hub_models/models/whisper_base_en/export.py
similarity index 74%
rename from qai_hub_models/models/whisper_asr/export.py
rename to qai_hub_models/models/whisper_base_en/export.py
index ece839b1..5eaddcb4 100644
--- a/qai_hub_models/models/whisper_asr/export.py
+++ b/qai_hub_models/models/whisper_base_en/export.py
@@ -10,14 +10,14 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
 
-from qai_hub_models.models.whisper_asr import Model
+from qai_hub_models.models.whisper_base_en import Model
 from qai_hub_models.utils.args import export_parser, get_model_kwargs
-from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
 from qai_hub_models.utils.compare import torch_inference
 from qai_hub_models.utils.input_spec import make_torch_inputs
 from qai_hub_models.utils.printing import (
@@ -85,17 +85,17 @@ def export_model(
             * A ProfileJob containing metadata about the profile job (None if profiling skipped).
             * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
     """
-    model_name = "whisper_asr"
+    model_name = "whisper_base_en"
     output_path = Path(output_dir or Path.cwd() / "build" / model_name)
     component_arg = components
     components = components or ALL_COMPONENTS
-    for component in components:
-        if component not in ALL_COMPONENTS:
-            raise ValueError(f"Invalid component {component}.")
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
-            "whisper_asr",
-            "Whisper-Base",
+            "whisper_base_en",
+            "Whisper-Base-En",
             device,
             skip_profiling,
             skip_inferencing,
@@ -110,64 +110,81 @@ def export_model(
 
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
-    components_dict = {}
+    components_dict: Dict[str, BaseModel] = {}
     if "WhisperEncoder" in components:
-        components_dict["WhisperEncoder"] = model.encoder
+        components_dict["WhisperEncoder"] = model.encoder  # type: ignore
     if "WhisperDecoder" in components:
-        components_dict["WhisperDecoder"] = model.decoder
+        components_dict["WhisperDecoder"] = model.decoder  # type: ignore
 
-    compile_jobs = {}
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        source_model = torch.jit.trace(component, make_torch_inputs(input_spec))
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
 
         # 2. Compile the models to an on-device asset
         model_compile_options = component.get_hub_compile_options(
             target_runtime, compile_options
         )
-        print(f"Optimizing model {component_name} to run on-device.")
-        compile_jobs[component_name] = hub.submit_compile_job(
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
             model=source_model,
             input_specs=input_spec,
             device=hub.Device(device),
-            name=f"{component_name}",
+            name=f"{model_name}_{component_name}",
             options=model_compile_options,
         )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
 
     # 3. Profile the model assets on real devices
-    profile_jobs = {}
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
     if not skip_profiling:
         for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             print(f"Profiling model {component_name} on a hosted device.")
-            profile_jobs[component_name] = hub.submit_profile_job(
+            submitted_profile_job = hub.submit_profile_job(
                 model=compile_jobs[component_name].get_target_model(),
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
             )
 
     # 4. Run inference on-device with sample inputs
-    inference_jobs = {}
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
     if not skip_inferencing:
         for component_name in components:
             print(
                 f"Running inference for {component_name} on a hosted device with example inputs."
             )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
             sample_inputs = components_dict[component_name].sample_inputs()
-            inference_jobs[component_name] = hub.submit_inference_job(
+            submitted_inference_job = hub.submit_inference_job(
                 model=compile_jobs[component_name].get_target_model(),
                 inputs=sample_inputs,
                 device=hub.Device(device),
-                name=f"{component_name}",
-                options=profile_options,
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
             )
 
     # 5. Download the model assets to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
         for component_name, compile_job in compile_jobs.items():
-            target_model = compile_job.get_target_model()
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
             target_model.download(
                 str(output_path / f"{model_name}_{component_name}.tflite")
             )
@@ -176,8 +193,8 @@ def export_model(
     if not skip_summary and not skip_profiling:
         for component_name in components:
             profile_job = profile_jobs[component_name]
-            assert profile_job.wait().success
-            profile_data = profile_job.download_profile()
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
             print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
@@ -185,8 +202,8 @@ def export_model(
             inference_job = inference_jobs[component_name]
             sample_inputs = components_dict[component_name].sample_inputs()
             torch_out = torch_inference(components_dict[component_name], sample_inputs)
-            assert inference_job.wait().success
-            inference_result = inference_job.download_output_data()
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
             print_inference_metrics(inference_job, inference_result, torch_out)
 
     return {
diff --git a/qai_hub_models/models/whisper_base_en/info.yaml b/qai_hub_models/models/whisper_base_en/info.yaml
new file mode 100644
index 00000000..320d726c
--- /dev/null
+++ b/qai_hub_models/models/whisper_base_en/info.yaml
@@ -0,0 +1,40 @@
+name: Whisper-Base-En
+# id must match with the model dir name in qai_hub_models
+id: whisper_base_en
+status: public
+headline: Automatic speech recognition (ASR) model for English transcription as well
+  as translation.
+domain: Audio
+description: OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below.
+use_case: Speech Recognition
+tags:
+  - foundation
+research_paper: https://cdn.openai.com/papers/whisper.pdf
+research_paper_title: Robust Speech Recognition via Large-Scale Weak Supervision
+license: https://github.com/openai/whisper/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/openai/whisper/tree/main
+technical_details:
+  Model checkpoint: base.en
+  Input resolution: 80x3000 (30 seconds audio)
+  Mean decoded sequence length: 112 tokens
+  Number of parameters (WhisperEncoder): 23.7M
+  Model size (WhisperEncoder): 90.6 MB
+  Number of parameters (WhisperDecoder): 48.6M
+  Model size (WhisperDecoder): 186 MB
+applicable_scenarios:
+  - Smart Home
+  - Accessibility
+related_models:
+  - whisper_tiny_en
+  - whisper_small_en
+  - huggingface_wavlm_base_plus
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+has_static_banner: yes
+has_animated_banner: yes
+license_type: mit
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/whisper_base_en/model.py b/qai_hub_models/models/whisper_base_en/model.py
new file mode 100644
index 00000000..fca5be00
--- /dev/null
+++ b/qai_hub_models/models/whisper_base_en/model.py
@@ -0,0 +1,16 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+from qai_hub_models.models._shared.whisper.model import Whisper
+
+MODEL_ID = __name__.split(".")[-2]
+WHISPER_VERSION = "base.en"
+
+
+class WhisperBaseEn(Whisper):
+    @classmethod
+    def from_pretrained(cls):
+        return Whisper.from_pretrained(WHISPER_VERSION)
diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml
new file mode 100644
index 00000000..5831fb37
--- /dev/null
+++ b/qai_hub_models/models/whisper_base_en/perf.yaml
@@ -0,0 +1,186 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+models:
+- name: WhisperEncoder
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 154406.0
+      throughput: 6.476432262994962
+      estimated_peak_memory_range:
+        min: 36892672
+        max: 232224176
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 315
+        layers_on_cpu: 0
+        total_layers: 315
+      job_id: jqp4q0vgo
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:10:43.748935Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 120437.0
+      throughput: 8.303096224582147
+      estimated_peak_memory_range:
+        min: 36777984
+        max: 66087104
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 315
+        layers_on_cpu: 0
+        total_layers: 315
+      job_id: jo5mrywgk
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:10:43.748943Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+- name: WhisperDecoder
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 14139.0
+      throughput: 70.72635971426551
+      estimated_peak_memory_range:
+        min: 3051520
+        max: 5712920
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 433
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 433
+      job_id: j0pxv21g7
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:16:05.499826Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 10614.0
+      throughput: 94.21518748822311
+      estimated_peak_memory_range:
+        min: 2019328
+        max: 96045024
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 433
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 433
+      job_id: jegn28rgo
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:16:05.499836Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/whisper_asr/requirements.txt b/qai_hub_models/models/whisper_base_en/requirements.txt
similarity index 100%
rename from qai_hub_models/models/whisper_asr/requirements.txt
rename to qai_hub_models/models/whisper_base_en/requirements.txt
diff --git a/qai_hub_models/models/whisper_base_en/test.py b/qai_hub_models/models/whisper_base_en/test.py
new file mode 100644
index 00000000..aeb74e53
--- /dev/null
+++ b/qai_hub_models/models/whisper_base_en/test.py
@@ -0,0 +1,22 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.test_utils import (
+    run_test_transcribe,
+    run_test_wrapper_numerics,
+)
+from qai_hub_models.models.whisper_tiny_en.demo import main as demo_main
+from qai_hub_models.models.whisper_tiny_en.model import WHISPER_VERSION
+
+
+def test_numerics():
+    run_test_wrapper_numerics(WHISPER_VERSION)
+
+
+def test_transcribe():
+    run_test_transcribe(WHISPER_VERSION)
+
+
+def test_demo():
+    demo_main()
diff --git a/qai_hub_models/models/whisper_small_en/README.md b/qai_hub_models/models/whisper_small_en/README.md
new file mode 100644
index 00000000..c1f21275
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/README.md
@@ -0,0 +1,59 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [Whisper-Small-En: Automatic speech recognition (ASR) model for English transcription as well as translation](https://aihub.qualcomm.com/models/whisper_small_en)
+
+OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below.
+
+This is based on the implementation of Whisper-Small-En found
+[here](https://github.com/openai/whisper/tree/main). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_small_en).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+Install the package via pip:
+```bash
+pip install "qai_hub_models[whisper_small_en]"
+```
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.whisper_small_en.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.whisper_small_en.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of Whisper-Small-En can be found
+  [here](https://github.com/openai/whisper/blob/main/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
+* [Source Model Implementation](https://github.com/openai/whisper/tree/main)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/whisper_small_en/__init__.py b/qai_hub_models/models/whisper_small_en/__init__.py
new file mode 100644
index 00000000..ac4b17de
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/__init__.py
@@ -0,0 +1,8 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.app import WhisperApp as App  # noqa: F401
+
+from .model import MODEL_ID  # noqa: F401
+from .model import WhisperSmallEn as Model  # noqa: F401
diff --git a/qai_hub_models/models/whisper_small_en/conftest.py b/qai_hub_models/models/whisper_small_en/conftest.py
new file mode 100644
index 00000000..a7507db2
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.whisper_small_en import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.whisper_small_en.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/whisper_small_en/demo.py b/qai_hub_models/models/whisper_small_en/demo.py
new file mode 100644
index 00000000..c3100f59
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.demo import whisper_demo
+from qai_hub_models.models.whisper_small_en.model import WhisperSmallEn
+
+
+def main():
+    whisper_demo(WhisperSmallEn)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py
new file mode 100644
index 00000000..348716d4
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/export.py
@@ -0,0 +1,229 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
+
+import qai_hub as hub
+import torch
+
+from qai_hub_models.models.whisper_small_en import Model
+from qai_hub_models.utils.args import export_parser, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.input_spec import make_torch_inputs
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+)
+
+ALL_COMPONENTS = ["WhisperEncoder", "WhisperDecoder"]
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    components: Optional[List[str]] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Mapping[
+    str, Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]]
+] | List[str]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        components: List of sub-components of the model that will be exported.
+            Each component is compiled and profiled separately.
+            Defaults to ALL_COMPONENTS if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained`
+
+    Returns:
+        A Mapping from component_name to a 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "whisper_small_en"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    component_arg = components
+    components = components or ALL_COMPONENTS
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "whisper_small_en",
+            "Whisper-Small-En",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+            component_arg,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    components_dict: Dict[str, BaseModel] = {}
+    if "WhisperEncoder" in components:
+        components_dict["WhisperEncoder"] = model.encoder  # type: ignore
+    if "WhisperDecoder" in components:
+        components_dict["WhisperDecoder"] = model.decoder  # type: ignore
+
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
+    for component_name, component in components_dict.items():
+        # Trace the model
+        input_spec = component.get_input_spec()
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
+
+        # 2. Compile the models to an on-device asset
+        model_compile_options = component.get_hub_compile_options(
+            target_runtime, compile_options
+        )
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
+            model=source_model,
+            input_specs=input_spec,
+            device=hub.Device(device),
+            name=f"{model_name}_{component_name}",
+            options=model_compile_options,
+        )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
+
+    # 3. Profile the model assets on real devices
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
+    if not skip_profiling:
+        for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
+            print(f"Profiling model {component_name} on a hosted device.")
+            submitted_profile_job = hub.submit_profile_job(
+                model=compile_jobs[component_name].get_target_model(),
+                device=hub.Device(device),
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
+            )
+
+    # 4. Run inference on-device with sample inputs
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
+    if not skip_inferencing:
+        for component_name in components:
+            print(
+                f"Running inference for {component_name} on a hosted device with example inputs."
+            )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
+            sample_inputs = components_dict[component_name].sample_inputs()
+            submitted_inference_job = hub.submit_inference_job(
+                model=compile_jobs[component_name].get_target_model(),
+                inputs=sample_inputs,
+                device=hub.Device(device),
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
+            )
+
+    # 5. Download the model assets to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        for component_name, compile_job in compile_jobs.items():
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+            target_model.download(
+                str(output_path / f"{model_name}_{component_name}.tflite")
+            )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        for component_name in components:
+            profile_job = profile_jobs[component_name]
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+            print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        for component_name in components:
+            inference_job = inference_jobs[component_name]
+            sample_inputs = components_dict[component_name].sample_inputs()
+            torch_out = torch_inference(components_dict[component_name], sample_inputs)
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+            print_inference_metrics(inference_job, inference_result, torch_out)
+
+    return {
+        component_name: (
+            compile_jobs[component_name],
+            profile_jobs.get(component_name, None),
+            inference_jobs.get(component_name, None),
+        )
+        for component_name in components
+    }
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(
+        model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False
+    )
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/whisper_small_en/info.yaml b/qai_hub_models/models/whisper_small_en/info.yaml
new file mode 100644
index 00000000..7227fbbb
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/info.yaml
@@ -0,0 +1,40 @@
+name: Whisper-Small-En
+# id must match with the model dir name in qai_hub_models
+id: whisper_small_en
+status: public
+headline: Automatic speech recognition (ASR) model for English transcription as well
+  as translation.
+domain: Audio
+description: OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below.
+use_case: Speech Recognition
+tags:
+  - foundation
+research_paper: https://cdn.openai.com/papers/whisper.pdf
+research_paper_title: Robust Speech Recognition via Large-Scale Weak Supervision
+license: https://github.com/openai/whisper/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/openai/whisper/tree/main
+technical_details:
+  Model checkpoint: small.en
+  Input resolution: 80x3000 (30 seconds audio)
+  Mean decoded sequence length: 112 tokens
+  Number of parameters (WhisperEncoder): 102M
+  Model size (WhisperEncoder): 390 MB
+  Number of parameters (WhisperDecoder): 139M
+  Model size (WhisperDecoder): 531 MB
+applicable_scenarios:
+  - Smart Home
+  - Accessibility
+related_models:
+  - whisper_tiny_en
+  - whisper_base_en
+  - huggingface_wavlm_base_plus
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+has_static_banner: yes
+has_animated_banner: yes
+license_type: mit
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/whisper_small_en/model.py b/qai_hub_models/models/whisper_small_en/model.py
new file mode 100644
index 00000000..54433dc6
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/model.py
@@ -0,0 +1,16 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+from qai_hub_models.models._shared.whisper.model import Whisper
+
+MODEL_ID = __name__.split(".")[-2]
+WHISPER_VERSION = "small.en"
+
+
+class WhisperSmallEn(Whisper):
+    @classmethod
+    def from_pretrained(cls):
+        return Whisper.from_pretrained(WHISPER_VERSION)
diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml
new file mode 100644
index 00000000..549cec62
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/perf.yaml
@@ -0,0 +1,186 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+models:
+- name: WhisperEncoder
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 602022.0
+      throughput: 1.6610688645929883
+      estimated_peak_memory_range:
+        min: 12288
+        max: 448965896
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 609
+        layers_on_cpu: 0
+        total_layers: 609
+      job_id: jvgdw4k5j
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:29:10.773412Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 461601.0
+      throughput: 2.1663731231084853
+      estimated_peak_memory_range:
+        min: 14163968
+        max: 46674320
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 609
+        layers_on_cpu: 0
+        total_layers: 609
+      job_id: jnp101l5q
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:29:10.773421Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+- name: WhisperDecoder
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 45479.0
+      throughput: 21.988170364343983
+      estimated_peak_memory_range:
+        min: 8577024
+        max: 12019040
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 853
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 853
+      job_id: jz5wozjp1
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:33:07.115194Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: jvgdw4l5j
+      job_status: Failed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:33:07.115203Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/whisper_small_en/requirements.txt b/qai_hub_models/models/whisper_small_en/requirements.txt
new file mode 100644
index 00000000..fa34d4f8
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/requirements.txt
@@ -0,0 +1,2 @@
+openai-whisper==20230314
+scipy==1.8.1
diff --git a/qai_hub_models/models/whisper_small_en/test.py b/qai_hub_models/models/whisper_small_en/test.py
new file mode 100644
index 00000000..aeb74e53
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_en/test.py
@@ -0,0 +1,22 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.test_utils import (
+    run_test_transcribe,
+    run_test_wrapper_numerics,
+)
+from qai_hub_models.models.whisper_tiny_en.demo import main as demo_main
+from qai_hub_models.models.whisper_tiny_en.model import WHISPER_VERSION
+
+
+def test_numerics():
+    run_test_wrapper_numerics(WHISPER_VERSION)
+
+
+def test_transcribe():
+    run_test_transcribe(WHISPER_VERSION)
+
+
+def test_demo():
+    demo_main()
diff --git a/qai_hub_models/models/whisper_small_multi/code-gen.yaml b/qai_hub_models/models/whisper_small_multi/code-gen.yaml
new file mode 100644
index 00000000..39d2d995
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_multi/code-gen.yaml
@@ -0,0 +1,4 @@
+components:
+  WhisperEncoder: model.encoder
+  WhisperDecoder: model.decoder
+qnn_export_failure_reason: "Compilation fails https://dev.aihub.qualcomm.com/jobs/jegnklrvg/ https://dev.aihub.qualcomm.com/jobs/joprw81v5 "
diff --git a/qai_hub_models/models/whisper_small_multi/demo.py b/qai_hub_models/models/whisper_small_multi/demo.py
new file mode 100644
index 00000000..0dbdb990
--- /dev/null
+++ b/qai_hub_models/models/whisper_small_multi/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.demo import whisper_demo
+from qai_hub_models.models.whisper_small_multi.model import WhisperSmallMulti
+
+
+def main():
+    whisper_demo(WhisperSmallMulti)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/whisper_tiny_en/README.md b/qai_hub_models/models/whisper_tiny_en/README.md
new file mode 100644
index 00000000..f92501ec
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/README.md
@@ -0,0 +1,59 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [Whisper-Tiny-En: Automatic speech recognition (ASR) model for English transcription as well as translation](https://aihub.qualcomm.com/models/whisper_tiny_en)
+
+OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below.
+
+This is based on the implementation of Whisper-Tiny-En found
+[here](https://github.com/openai/whisper/tree/main). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_tiny_en).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+## Example & Usage
+
+Install the package via pip:
+```bash
+pip install "qai_hub_models[whisper_tiny_en]"
+```
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.whisper_tiny_en.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.whisper_tiny_en.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of Whisper-Tiny-En can be found
+  [here](https://github.com/openai/whisper/blob/main/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
+
+## References
+* [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
+* [Source Model Implementation](https://github.com/openai/whisper/tree/main)
+
+## Community
+* Join [our AI Hub Slack community](https://join.slack.com/t/qualcomm-ai-hub/shared_invite/zt-2dgf95loi-CXHTDRR1rvPgQWPO~ZZZJg) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/whisper_tiny_en/__init__.py b/qai_hub_models/models/whisper_tiny_en/__init__.py
new file mode 100644
index 00000000..d454567c
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/__init__.py
@@ -0,0 +1,8 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.app import WhisperApp as App  # noqa: F401
+
+from .model import MODEL_ID  # noqa: F401
+from .model import WhisperTinyEn as Model  # noqa: F401
diff --git a/qai_hub_models/models/whisper_tiny_en/conftest.py b/qai_hub_models/models/whisper_tiny_en/conftest.py
new file mode 100644
index 00000000..fdfc63e0
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.whisper_tiny_en import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.whisper_tiny_en.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/whisper_tiny_en/demo.py b/qai_hub_models/models/whisper_tiny_en/demo.py
new file mode 100644
index 00000000..073ab120
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.demo import whisper_demo
+from qai_hub_models.models.whisper_tiny_en.model import WhisperTinyEn
+
+
+def main():
+    whisper_demo(WhisperTinyEn)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py
new file mode 100644
index 00000000..57b7c76b
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/export.py
@@ -0,0 +1,229 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Mapping, Optional, Tuple, cast
+
+import qai_hub as hub
+import torch
+
+from qai_hub_models.models.whisper_tiny_en import Model
+from qai_hub_models.utils.args import export_parser, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.input_spec import make_torch_inputs
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+)
+
+ALL_COMPONENTS = ["WhisperEncoder", "WhisperDecoder"]
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23",
+    components: Optional[List[str]] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Mapping[
+    str, Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]]
+] | List[str]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        components: List of sub-components of the model that will be exported.
+            Each component is compiled and profiled separately.
+            Defaults to ALL_COMPONENTS if not specified.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained`
+
+    Returns:
+        A Mapping from component_name to a 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "whisper_tiny_en"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    component_arg = components
+    components = components or ALL_COMPONENTS
+    for component_name in components:
+        if component_name not in ALL_COMPONENTS:
+            raise ValueError(f"Invalid component {component_name}.")
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "whisper_tiny_en",
+            "Whisper-Tiny-En",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+            component_arg,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    components_dict: Dict[str, BaseModel] = {}
+    if "WhisperEncoder" in components:
+        components_dict["WhisperEncoder"] = model.encoder  # type: ignore
+    if "WhisperDecoder" in components:
+        components_dict["WhisperDecoder"] = model.decoder  # type: ignore
+
+    compile_jobs: Dict[str, hub.client.CompileJob] = {}
+    for component_name, component in components_dict.items():
+        # Trace the model
+        input_spec = component.get_input_spec()
+        source_model = torch.jit.trace(
+            component.to("cpu"), make_torch_inputs(input_spec)
+        )
+
+        # 2. Compile the models to an on-device asset
+        model_compile_options = component.get_hub_compile_options(
+            target_runtime, compile_options
+        )
+        print(f"Optimizing model {component_name} to run on-device")
+        submitted_compile_job = hub.submit_compile_job(
+            model=source_model,
+            input_specs=input_spec,
+            device=hub.Device(device),
+            name=f"{model_name}_{component_name}",
+            options=model_compile_options,
+        )
+        compile_jobs[component_name] = cast(
+            hub.client.CompileJob, submitted_compile_job
+        )
+
+    # 3. Profile the model assets on real devices
+    profile_jobs: Dict[str, hub.client.ProfileJob] = {}
+    if not skip_profiling:
+        for component_name in components:
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
+            print(f"Profiling model {component_name} on a hosted device.")
+            submitted_profile_job = hub.submit_profile_job(
+                model=compile_jobs[component_name].get_target_model(),
+                device=hub.Device(device),
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            profile_jobs[component_name] = cast(
+                hub.client.ProfileJob, submitted_profile_job
+            )
+
+    # 4. Run inference on-device with sample inputs
+    inference_jobs: Dict[str, hub.client.InferenceJob] = {}
+    if not skip_inferencing:
+        for component_name in components:
+            print(
+                f"Running inference for {component_name} on a hosted device with example inputs."
+            )
+            profile_options_all = components_dict[
+                component_name
+            ].get_hub_profile_options(target_runtime, profile_options)
+            sample_inputs = components_dict[component_name].sample_inputs()
+            submitted_inference_job = hub.submit_inference_job(
+                model=compile_jobs[component_name].get_target_model(),
+                inputs=sample_inputs,
+                device=hub.Device(device),
+                name=f"{model_name}_{component_name}",
+                options=profile_options_all,
+            )
+            inference_jobs[component_name] = cast(
+                hub.client.InferenceJob, submitted_inference_job
+            )
+
+    # 5. Download the model assets to a local file
+    if not skip_downloading:
+        os.makedirs(output_path, exist_ok=True)
+        for component_name, compile_job in compile_jobs.items():
+            target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+            target_model.download(
+                str(output_path / f"{model_name}_{component_name}.tflite")
+            )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        for component_name in components:
+            profile_job = profile_jobs[component_name]
+            assert profile_job is not None and profile_job.wait().success
+            profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+            print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        for component_name in components:
+            inference_job = inference_jobs[component_name]
+            sample_inputs = components_dict[component_name].sample_inputs()
+            torch_out = torch_inference(components_dict[component_name], sample_inputs)
+            assert inference_job is not None and inference_job.wait().success
+            inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+            print_inference_metrics(inference_job, inference_result, torch_out)
+
+    return {
+        component_name: (
+            compile_jobs[component_name],
+            profile_jobs.get(component_name, None),
+            inference_jobs.get(component_name, None),
+        )
+        for component_name in components
+    }
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(
+        model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False
+    )
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/whisper_tiny_en/info.yaml b/qai_hub_models/models/whisper_tiny_en/info.yaml
new file mode 100644
index 00000000..4fb672ed
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/info.yaml
@@ -0,0 +1,40 @@
+name: Whisper-Tiny-En
+# id must match with the model dir name in qai_hub_models
+id: whisper_tiny_en
+status: public
+headline: Automatic speech recognition (ASR) model for English transcription
+  as well as translation.
+domain: Audio
+description: OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below.
+use_case: Speech Recognition
+tags:
+  - foundation
+research_paper: https://cdn.openai.com/papers/whisper.pdf
+research_paper_title: Robust Speech Recognition via Large-Scale Weak Supervision
+license: https://github.com/openai/whisper/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/openai/whisper/tree/main
+technical_details:
+  Model checkpoint: tiny.en
+  Input resolution: 80x3000 (30 seconds audio)
+  Mean decoded sequence length: 112 tokens
+  Number of parameters (WhisperEncoder): 9.39M
+  Model size (WhisperEncoder): 35.9 MB
+  Number of parameters (WhisperDecoder): 28.2M
+  Model size (WhisperDecoder): 108 MB
+applicable_scenarios:
+  - Smart Home
+  - Accessibility
+related_models:
+  - whisper_base_en
+  - whisper_small_en
+  - huggingface_wavlm_base_plus
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+has_static_banner: yes
+has_animated_banner: yes
+license_type: mit
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/whisper_tiny_en/model.py b/qai_hub_models/models/whisper_tiny_en/model.py
new file mode 100644
index 00000000..b430fe61
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/model.py
@@ -0,0 +1,16 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+from qai_hub_models.models._shared.whisper.model import Whisper
+
+MODEL_ID = __name__.split(".")[-2]
+WHISPER_VERSION = "tiny.en"
+
+
+class WhisperTinyEn(Whisper):
+    @classmethod
+    def from_pretrained(cls):
+        return Whisper.from_pretrained(WHISPER_VERSION)
diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml
new file mode 100644
index 00000000..a00f22a5
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml
@@ -0,0 +1,186 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy Tab S8
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+models:
+- name: WhisperEncoder
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 67350.0
+      throughput: 14.847809948032666
+      estimated_peak_memory_range:
+        min: 11608064
+        max: 57976544
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 217
+        layers_on_cpu: 0
+        total_layers: 217
+      job_id: jz57zx9p3
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:03:16.946141Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 53449.0
+      throughput: 18.709423936836984
+      estimated_peak_memory_range:
+        min: 0
+        max: 27656928
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 217
+        layers_on_cpu: 0
+        total_layers: 217
+      job_id: jegn23qgo
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:03:16.946150Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+- name: WhisperDecoder
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 7423.0
+      throughput: 134.71642193183348
+      estimated_peak_memory_range:
+        min: 1634304
+        max: 4170776
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 293
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 293
+      job_id: jqp4qv1go
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:09:31.853789Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 5570.0
+      throughput: 179.53321364452424
+      estimated_peak_memory_range:
+        min: 466944
+        max: 230273920
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 293
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 293
+      job_id: joprke750
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:09:31.853814Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/whisper_tiny_en/requirements.txt b/qai_hub_models/models/whisper_tiny_en/requirements.txt
new file mode 100644
index 00000000..75b1cf12
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/requirements.txt
@@ -0,0 +1,2 @@
+openai-whisper==20230314
+scipy
diff --git a/qai_hub_models/models/whisper_tiny_en/test.py b/qai_hub_models/models/whisper_tiny_en/test.py
new file mode 100644
index 00000000..aeb74e53
--- /dev/null
+++ b/qai_hub_models/models/whisper_tiny_en/test.py
@@ -0,0 +1,22 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.whisper.test_utils import (
+    run_test_transcribe,
+    run_test_wrapper_numerics,
+)
+from qai_hub_models.models.whisper_tiny_en.demo import main as demo_main
+from qai_hub_models.models.whisper_tiny_en.model import WHISPER_VERSION
+
+
+def test_numerics():
+    run_test_wrapper_numerics(WHISPER_VERSION)
+
+
+def test_transcribe():
+    run_test_transcribe(WHISPER_VERSION)
+
+
+def test_demo():
+    demo_main()
diff --git a/qai_hub_models/models/wideresnet50/README.md b/qai_hub_models/models/wideresnet50/README.md
index f8b69f8c..33f834c9 100644
--- a/qai_hub_models/models/wideresnet50/README.md
+++ b/qai_hub_models/models/wideresnet50/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of WideResNet50 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/wideresnet50).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.wideresnet50.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of WideResNet50 can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Wide Residual Networks](https://arxiv.org/abs/1605.07146)
diff --git a/qai_hub_models/models/wideresnet50/conftest.py b/qai_hub_models/models/wideresnet50/conftest.py
new file mode 100644
index 00000000..c4c08578
--- /dev/null
+++ b/qai_hub_models/models/wideresnet50/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.wideresnet50 import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.wideresnet50.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/wideresnet50/demo.py b/qai_hub_models/models/wideresnet50/demo.py
index e0fc917e..04487876 100644
--- a/qai_hub_models/models/wideresnet50/demo.py
+++ b/qai_hub_models/models/wideresnet50/demo.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.wideresnet50.model import WideResNet50
+from qai_hub_models.models.wideresnet50.model import MODEL_ID, WideResNet50
 
 
 def main(is_test: bool = False):
-    imagenet_demo(WideResNet50, is_test)
+    imagenet_demo(WideResNet50, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/wideresnet50/export.py b/qai_hub_models/models/wideresnet50/export.py
index 023f1afc..c86de65a 100644
--- a/qai_hub_models/models/wideresnet50/export.py
+++ b/qai_hub_models/models/wideresnet50/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,33 +154,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/wideresnet50/info.yaml b/qai_hub_models/models/wideresnet50/info.yaml
index 3e10e96e..abeab0e0 100644
--- a/qai_hub_models/models/wideresnet50/info.yaml
+++ b/qai_hub_models/models/wideresnet50/info.yaml
@@ -13,6 +13,7 @@ tags:
 research_paper: https://arxiv.org/abs/1605.07146
 research_paper_title: Wide Residual Networks
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -35,6 +36,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/wideresnet50/model.py b/qai_hub_models/models/wideresnet50/model.py
index f8d7d130..9e3358bd 100644
--- a/qai_hub_models/models/wideresnet50/model.py
+++ b/qai_hub_models/models/wideresnet50/model.py
@@ -14,6 +14,6 @@
 
 class WideResNet50(ImagenetClassifier):
     @classmethod
-    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
+    def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> WideResNet50:
         net = tv_models.wide_resnet50_2(weights=weights)
         return cls(net)
diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml
index 9c9625ba..6dbe8a6c 100644
--- a/qai_hub_models/models/wideresnet50/perf.yaml
+++ b/qai_hub_models/models/wideresnet50/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: WideResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 4393.0
-      throughput: 227.6348736626451
+      inference_time: 4401.0
+      throughput: 227.22108611679164
       estimated_peak_memory_range:
-        min: 24576
-        max: 1816072
+        min: 20480
+        max: 2132848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 77
-      job_id: jz57el9rp
+      job_id: jwgoyr458
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:38.602998Z'
     torchscript_onnx_qnn:
-      inference_time: 4605.0
-      throughput: 217.15526601520088
+      inference_time: 4580.0
+      throughput: 218.34061135371178
       estimated_peak_memory_range:
-        min: 0
-        max: 313348064
+        min: 618496
+        max: 323904968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 125
-      job_id: jqp4yd3lp
+      job_id: j7gjx77pd
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 3306.0
+      throughput: 302.48033877797945
+      estimated_peak_memory_range:
+        min: 16384
+        max: 94385296
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 77
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 77
+      job_id: j1pv3d75x
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:11:06.129828Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:15:48.204812Z'
+    torchscript_onnx_qnn:
+      inference_time: 3413.0
+      throughput: 292.99736302373276
+      estimated_peak_memory_range:
+        min: 618496
+        max: 52379088
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 125
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 125
+      job_id: jlpe9z7gr
+      job_status: Passed
diff --git a/qai_hub_models/models/wideresnet50/test.py b/qai_hub_models/models/wideresnet50/test.py
index 09976a8e..a089afd8 100644
--- a/qai_hub_models/models/wideresnet50/test.py
+++ b/qai_hub_models/models/wideresnet50/test.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+import pytest
+
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
     run_imagenet_classifier_trace_test,
@@ -14,6 +16,7 @@ def test_task():
     run_imagenet_classifier_test(WideResNet50.from_pretrained(), MODEL_ID)
 
 
+@pytest.mark.trace
 def test_trace():
     run_imagenet_classifier_trace_test(WideResNet50.from_pretrained())
 
diff --git a/qai_hub_models/models/wideresnet50_quantized/README.md b/qai_hub_models/models/wideresnet50_quantized/README.md
index d8e51c8b..0f4c4c23 100644
--- a/qai_hub_models/models/wideresnet50_quantized/README.md
+++ b/qai_hub_models/models/wideresnet50_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of WideResNet50-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/wideresnet50_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.wideresnet50_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of WideResNet50-Quantized can be found
   [here](https://github.com/pytorch/vision/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Wide Residual Networks](https://arxiv.org/abs/1605.07146)
diff --git a/qai_hub_models/models/wideresnet50_quantized/conftest.py b/qai_hub_models/models/wideresnet50_quantized/conftest.py
new file mode 100644
index 00000000..bd7b0f33
--- /dev/null
+++ b/qai_hub_models/models/wideresnet50_quantized/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.wideresnet50_quantized import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.wideresnet50_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/wideresnet50_quantized/demo.py b/qai_hub_models/models/wideresnet50_quantized/demo.py
index c124c6fa..92feb1a1 100644
--- a/qai_hub_models/models/wideresnet50_quantized/demo.py
+++ b/qai_hub_models/models/wideresnet50_quantized/demo.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
-from qai_hub_models.models.wideresnet50_quantized.model import WideResNet50Quantizable
+from qai_hub_models.models.wideresnet50_quantized.model import (
+    MODEL_ID,
+    WideResNet50Quantizable,
+)
 
 
 def main(is_test: bool = False):
-    imagenet_demo(WideResNet50Quantizable, is_test)
+    imagenet_demo(WideResNet50Quantizable, MODEL_ID, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py
index 4bad440d..ad05928c 100644
--- a/qai_hub_models/models/wideresnet50_quantized/export.py
+++ b/qai_hub_models/models/wideresnet50_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -120,8 +120,8 @@ def export_model(
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image_tensor"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -129,21 +129,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -155,33 +163,35 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image_tensor", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(inference_job, inference_result, torch_out)
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/wideresnet50_quantized/info.yaml b/qai_hub_models/models/wideresnet50_quantized/info.yaml
index e1b9b755..ec14612f 100644
--- a/qai_hub_models/models/wideresnet50_quantized/info.yaml
+++ b/qai_hub_models/models/wideresnet50_quantized/info.yaml
@@ -14,6 +14,7 @@ tags:
 research_paper: https://arxiv.org/abs/1605.07146
 research_paper_title: Wide Residual Networks
 license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
 technical_details:
   Model checkpoint: Imagenet
@@ -36,6 +37,7 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
 dataset:
   - imagenet-1k
   - imagenet-22k
diff --git a/qai_hub_models/models/wideresnet50_quantized/model.py b/qai_hub_models/models/wideresnet50_quantized/model.py
index 86bdd679..9b1086a0 100644
--- a/qai_hub_models/models/wideresnet50_quantized/model.py
+++ b/qai_hub_models/models/wideresnet50_quantized/model.py
@@ -13,15 +13,20 @@
 # isort: on
 
 import torch
-from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.cross_layer_equalization import (
+    equalize_bn_folded_model,
+    fold_all_batch_norms,
+)
+from aimet_torch.model_preparer import prepare_model
 from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
 
 from qai_hub_models.models.wideresnet50.model import WideResNet50
-from qai_hub_models.utils.aimet.config_loader import get_per_channel_aimet_config
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 1
+MODEL_ASSET_VERSION = 2
 DEFAULT_ENCODINGS = "wideresnet50_quantized_encodings.json"
 
 
@@ -37,9 +42,15 @@ def __init__(
     ) -> None:
         WideResNet50.__init__(self, sim_model.model)
         AIMETQuantizableMixin.__init__(
-            self, sim_model, needs_onnx_direct_aimet_export=True
+            self,
+            sim_model,
         )
 
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        return SourceModelFormat.ONNX
+
     @classmethod
     def from_pretrained(
         cls,
@@ -53,16 +64,19 @@ def from_pretrained(
             else: Interprets as a filepath and loads the encodings stored there.
         """
         model = WideResNet50.from_pretrained()
-        input_shape = model.get_input_spec()["image_tensor"][0]
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+        model = prepare_model(model)
+        dummy_input = torch.rand(input_shape)
 
-        equalize_model(model, input_shape)
+        pairs = fold_all_batch_norms(model, input_shape, dummy_input)
+        equalize_bn_folded_model(model, input_shape, pairs, dummy_input)
         sim = QuantizationSimModel(
-            model.net,
+            model,
             quant_scheme="tf_enhanced",
             default_param_bw=8,
             default_output_bw=8,
-            config_file=get_per_channel_aimet_config(),
-            dummy_input=torch.rand(input_shape),
+            config_file=get_default_aimet_config(),
+            dummy_input=dummy_input,
         )
 
         if aimet_encodings:
@@ -74,3 +88,11 @@ def from_pretrained(
 
         sim.model.eval()
         return cls(sim)
+
+    def get_hub_compile_options(
+        self, target_runtime: TargetRuntime, other_compile_options: str = ""
+    ) -> str:
+        compile_options = super().get_hub_compile_options(
+            target_runtime, other_compile_options
+        )
+        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml
index b9121909..b597a1f7 100644
--- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml
+++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml
@@ -17,51 +17,92 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: WideResNet50-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1833.0
-      throughput: 545.5537370430987
+      inference_time: 1767.0
+      throughput: 565.9309564233164
       estimated_peak_memory_range:
-        min: 28672
-        max: 1710680
+        min: 24576
+        max: 1759936
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
-        layers_on_npu: 80
+        layers_on_npu: 78
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 80
-      job_id: jz5wl34jp
+        total_layers: 78
+      job_id: jz5wo4zp1
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:12:41.207435Z'
     torchscript_onnx_qnn:
-      inference_time: 1756.0
-      throughput: 569.4760820045558
+      inference_time: 1707.0
+      throughput: 585.8230814294083
+      estimated_peak_memory_range:
+        min: 28672
+        max: 479496224
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 76
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 76
+      job_id: j0pxvxjg7
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1322.0
+      throughput: 756.4296520423601
       estimated_peak_memory_range:
-        min: 520192
-        max: 152789048
+        min: 16384
+        max: 54559456
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 78
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jmg9zydvp
+      job_id: jvgdw2k5j
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:11:48.964511Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:17:15.136644Z'
+    torchscript_onnx_qnn:
+      inference_time: 1291.0
+      throughput: 774.5933384972889
+      estimated_peak_memory_range:
+        min: 167936
+        max: 41865680
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 76
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 76
+      job_id: jogkz4ygd
+      job_status: Passed
diff --git a/qai_hub_models/models/wideresnet50_quantized/test.py b/qai_hub_models/models/wideresnet50_quantized/test.py
index cb4bac98..fbe14f34 100644
--- a/qai_hub_models/models/wideresnet50_quantized/test.py
+++ b/qai_hub_models/models/wideresnet50_quantized/test.py
@@ -4,7 +4,6 @@
 # ---------------------------------------------------------------------
 from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
     run_imagenet_classifier_test,
-    run_imagenet_classifier_trace_test,
 )
 from qai_hub_models.models.wideresnet50_quantized.demo import main as demo_main
 from qai_hub_models.models.wideresnet50_quantized.model import (
@@ -26,16 +25,6 @@ def test_task():
     )
 
 
-def test_trace():
-    run_imagenet_classifier_trace_test(
-        WideResNet50Quantizable.from_pretrained(),
-        diff_tol=0.01,
-        rtol=0.02,
-        atol=0.2,
-        is_quantized=True,
-    )
-
-
 def test_demo():
     # Verify demo does not crash
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/xlsr/README.md b/qai_hub_models/models/xlsr/README.md
index d8b89438..47d468ec 100644
--- a/qai_hub_models/models/xlsr/README.md
+++ b/qai_hub_models/models/xlsr/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of XLSR found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/xlsr).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.xlsr.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of XLSR can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices](https://arxiv.org/abs/2105.10288)
diff --git a/qai_hub_models/models/xlsr/conftest.py b/qai_hub_models/models/xlsr/conftest.py
new file mode 100644
index 00000000..0b729d36
--- /dev/null
+++ b/qai_hub_models/models/xlsr/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.xlsr import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.xlsr.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/xlsr/demo.py b/qai_hub_models/models/xlsr/demo.py
index 9d531a77..942a23f3 100644
--- a/qai_hub_models/models/xlsr/demo.py
+++ b/qai_hub_models/models/xlsr/demo.py
@@ -12,7 +12,7 @@
 
 
 def main(is_test: bool = False):
-    super_resolution_demo(XLSR, IMAGE_ADDRESS, is_test)
+    super_resolution_demo(XLSR, MODEL_ID, IMAGE_ADDRESS, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/xlsr/export.py b/qai_hub_models/models/xlsr/export.py
index b760df96..c45d8d65 100644
--- a/qai_hub_models/models/xlsr/export.py
+++ b/qai_hub_models/models/xlsr/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,7 +109,7 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
@@ -118,29 +118,37 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -149,30 +157,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/xlsr/info.yaml b/qai_hub_models/models/xlsr/info.yaml
index 1751d48f..cec3ec6d 100644
--- a/qai_hub_models/models/xlsr/info.yaml
+++ b/qai_hub_models/models/xlsr/info.yaml
@@ -11,6 +11,7 @@ research_paper: https://arxiv.org/abs/2105.10288
 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image
   Super Resolution for Mobile Devices
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr
 technical_details:
   Model checkpoint: xlsr_4x_checkpoint_float32
@@ -28,4 +29,5 @@ related_models: [esrgan, real_esrgan_general_x4v3]
 has_static_banner: yes
 has_animated_banner: yes
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/xlsr/model.py b/qai_hub_models/models/xlsr/model.py
index 5f4a2ffd..aaee1928 100644
--- a/qai_hub_models/models/xlsr/model.py
+++ b/qai_hub_models/models/xlsr/model.py
@@ -8,7 +8,8 @@
 
 from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
 from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, SourceAsRoot
+from qai_hub_models.utils.aimet.repo import aimet_zoo_as_root
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.input_spec import InputSpec
 
@@ -81,12 +82,7 @@ def get_input_spec(
 def _load_xlsr_source_model() -> torch.nn.Module:
     # Load XLSR model from the source repository using the given weights.
     # Returns <source repository>.utils.super_resolution.models.XLSRRelease
-    with SourceAsRoot(
-        XLSR_SOURCE_REPOSITORY,
-        XLSR_SOURCE_REPO_COMMIT,
-        MODEL_ID,
-        MODEL_ASSET_VERSION,
-    ):
+    with aimet_zoo_as_root():
         # necessary import. `modeling.deeplab` comes from the XLSR repo.
         from aimet_zoo_torch.common.super_resolution.models import XLSRRelease
 
diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml
index 91c2f707..3f267734 100644
--- a/qai_hub_models/models/xlsr/perf.yaml
+++ b/qai_hub_models/models/xlsr/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: XLSR
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2523.0
-      throughput: 396.3535473642489
+      inference_time: 2508.0
+      throughput: 398.72408293460927
       estimated_peak_memory_range:
-        min: 24576
-        max: 1686120
+        min: 16384
+        max: 9569248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: jogk2qlyg
+      job_id: jz57z6np3
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:14:58.320277Z'
     torchscript_onnx_qnn:
-      inference_time: 1068.0
-      throughput: 936.3295880149813
+      inference_time: 987.0
+      throughput: 1013.1712259371834
       estimated_peak_memory_range:
-        min: 217088
-        max: 63076024
+        min: 2121728
+        max: 10203592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 22
-      job_id: jn5qlr77p
+      job_id: j0pxvm8g7
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 1996.0
+      throughput: 501.00200400801606
+      estimated_peak_memory_range:
+        min: 16384
+        max: 19879696
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 13
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 16
+      job_id: jqp4q82go
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:37:57.776098Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:16:54.078428Z'
+    torchscript_onnx_qnn:
+      inference_time: 631.0
+      throughput: 1584.7860538827258
+      estimated_peak_memory_range:
+        min: 225280
+        max: 18045792
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 22
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 22
+      job_id: jegn2xjgo
+      job_status: Passed
diff --git a/qai_hub_models/models/xlsr_quantized/README.md b/qai_hub_models/models/xlsr_quantized/README.md
index 03bba459..106e48b4 100644
--- a/qai_hub_models/models/xlsr_quantized/README.md
+++ b/qai_hub_models/models/xlsr_quantized/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of XLSR-Quantized found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/xlsr_quantized).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.xlsr_quantized.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of XLSR-Quantized can be found
   [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices](https://arxiv.org/abs/2105.10288)
diff --git a/qai_hub_models/models/xlsr_quantized/conftest.py b/qai_hub_models/models/xlsr_quantized/conftest.py
new file mode 100644
index 00000000..c2a4915a
--- /dev/null
+++ b/qai_hub_models/models/xlsr_quantized/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.xlsr_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.xlsr_quantized.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/xlsr_quantized/demo.py b/qai_hub_models/models/xlsr_quantized/demo.py
index fde391bd..af51277d 100644
--- a/qai_hub_models/models/xlsr_quantized/demo.py
+++ b/qai_hub_models/models/xlsr_quantized/demo.py
@@ -19,6 +19,7 @@
 def main(is_test: bool = False):
     super_resolution_demo(
         XLSRQuantizable,
+        MODEL_ID,
         IMAGE_ADDRESS,
         is_test,
         available_target_runtimes=[TargetRuntime.TFLITE],
diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py
index 9ae48155..d8b04a25 100644
--- a/qai_hub_models/models/xlsr_quantized/export.py
+++ b/qai_hub_models/models/xlsr_quantized/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 
@@ -123,8 +123,8 @@ def export_model(
         + " --force_channel_last_input image"
         + " --force_channel_last_output output_0",
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
@@ -132,21 +132,29 @@ def export_model(
         calibration_data=quant_calibration_data,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -158,30 +166,31 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         # Convert outputs from channel last to channel first
         inference_result = transpose_channel_last_to_first(
             "output_0", inference_result, target_runtime
diff --git a/qai_hub_models/models/xlsr_quantized/info.yaml b/qai_hub_models/models/xlsr_quantized/info.yaml
index 55059211..38920617 100644
--- a/qai_hub_models/models/xlsr_quantized/info.yaml
+++ b/qai_hub_models/models/xlsr_quantized/info.yaml
@@ -12,6 +12,7 @@ research_paper: https://arxiv.org/abs/2105.10288
 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image
   Super Resolution for Mobile Devices
 license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr
 technical_details:
   Model checkpoint: xlsr_4x_checkpoint_w8a8
@@ -29,4 +30,5 @@ related_models: [esrgan, real_esrgan_general_x4v3, xlsr]
 has_static_banner: yes
 has_animated_banner: yes
 license_type: other
+deploy_license_type: AI Model Hub License
 dataset: []
diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml
index e76f30ab..9bf56e2f 100644
--- a/qai_hub_models/models/xlsr_quantized/perf.yaml
+++ b/qai_hub_models/models/xlsr_quantized/perf.yaml
@@ -17,31 +17,42 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: XLSR-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1298.0
-      throughput: 770.4160246533128
+      inference_time: 1349.0
+      throughput: 741.2898443291327
       estimated_peak_memory_range:
-        min: 24576
-        max: 1426056
+        min: 28672
+        max: 1726904
       primary_compute_unit: NPU
-      precision: fp16
+      precision: int8
       layer_info:
         layers_on_npu: 16
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 19
-      job_id: jo5m064yg
+      job_id: j1p3k3l52
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:58:09.460010Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 1084.0
+      throughput: 922.509225092251
+      estimated_peak_memory_range:
+        min: 20480
+        max: 21010912
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 16
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 19
+      job_id: jwgoy0x58
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:31:32.010687Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:58:09.460020Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/yolov6/README.md b/qai_hub_models/models/yolov6/README.md
index 0b2e5623..3cf265f5 100644
--- a/qai_hub_models/models/yolov6/README.md
+++ b/qai_hub_models/models/yolov6/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Yolo-v6 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov6).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -25,7 +25,7 @@ python -m qai_hub_models.models.yolov6.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -41,7 +41,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-v6 can be found
   [here](https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications](https://arxiv.org/abs/2209.02976)
diff --git a/qai_hub_models/models/yolov6/conftest.py b/qai_hub_models/models/yolov6/conftest.py
new file mode 100644
index 00000000..6055e321
--- /dev/null
+++ b/qai_hub_models/models/yolov6/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.yolov6 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.yolov6.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/yolov6/demo.py b/qai_hub_models/models/yolov6/demo.py
index 9f2ba92b..b1d136c1 100644
--- a/qai_hub_models/models/yolov6/demo.py
+++ b/qai_hub_models/models/yolov6/demo.py
@@ -18,6 +18,7 @@
 def main(is_test: bool = False):
     yolo_detection_demo(
         YoloV6,
+        MODEL_ID,
         YoloV6DetectionApp,
         IMAGE_ADDRESS,
         YoloV6.STRIDE_MULTIPLE,
diff --git a/qai_hub_models/models/yolov6/export.py b/qai_hub_models/models/yolov6/export.py
index a425cd58..1fd64724 100644
--- a/qai_hub_models/models/yolov6/export.py
+++ b/qai_hub_models/models/yolov6/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,35 +154,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, outputs_to_skip=[2]
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/yolov6/info.yaml b/qai_hub_models/models/yolov6/info.yaml
index 9195123b..419842fd 100644
--- a/qai_hub_models/models/yolov6/info.yaml
+++ b/qai_hub_models/models/yolov6/info.yaml
@@ -14,6 +14,8 @@ research_paper_title: 'YOLOv6: A Single-Stage Object Detection Framework for Ind
   Applications'
 license:
   https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE
+deploy_license:
+  https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE
 source_repo: https://github.com/meituan/YOLOv6/
 technical_details:
   Model checkpoint: YoloV6-N
@@ -35,4 +37,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: gpl-3.0
+deploy_license_type: gpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml
index 82e39b9b..93dc9a2a 100644
--- a/qai_hub_models/models/yolov6/perf.yaml
+++ b/qai_hub_models/models/yolov6/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Yolo-v6
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7848.0
-      throughput: 127.420998980632
+      inference_time: 8480.0
+      throughput: 117.9245283018868
       estimated_peak_memory_range:
-        min: 32768
-        max: 7233136
+        min: 24576
+        max: 3130456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: jqpyoj4r5
+      job_id: jz5wo0jp1
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:29:36.439969Z'
     torchscript_onnx_qnn:
-      inference_time: 7283.0
-      throughput: 137.3060551970342
+      inference_time: 7275.0
+      throughput: 137.4570446735395
       estimated_peak_memory_range:
-        min: 4931584
-        max: 17461520
+        min: 4939776
+        max: 18286232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: j2p0m212g
+      job_id: jnp10kl5q
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 6051.0
+      throughput: 165.26194017517767
+      estimated_peak_memory_range:
+        min: 16384
+        max: 74357488
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 182
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 182
+      job_id: jmg9v7v57
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:12:26.065342Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:32:50.147901Z'
+    torchscript_onnx_qnn:
+      inference_time: 5175.0
+      throughput: 193.23671497584542
+      estimated_peak_memory_range:
+        min: 4931584
+        max: 94425040
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 230
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 230
+      job_id: jz57zmrp3
+      job_status: Passed
diff --git a/qai_hub_models/models/yolov6/test.py b/qai_hub_models/models/yolov6/test.py
index d3d13d82..4239dec7 100644
--- a/qai_hub_models/models/yolov6/test.py
+++ b/qai_hub_models/models/yolov6/test.py
@@ -46,5 +46,6 @@ def test_task():
             assert np.allclose(source_out_postprocessed[i], qaihm_out_postprocessed[i])
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/yolov7/README.md b/qai_hub_models/models/yolov7/README.md
index 73cdec65..2db3ff95 100644
--- a/qai_hub_models/models/yolov7/README.md
+++ b/qai_hub_models/models/yolov7/README.md
@@ -10,7 +10,7 @@ This is based on the implementation of Yolo-v7 found
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov7).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.yolov7.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -46,7 +46,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub.
 ## License
 - The license for the original implementation of Yolo-v7 can be found
   [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/abs/2207.02696)
diff --git a/qai_hub_models/models/yolov7/conftest.py b/qai_hub_models/models/yolov7/conftest.py
new file mode 100644
index 00000000..d2efde67
--- /dev/null
+++ b/qai_hub_models/models/yolov7/conftest.py
@@ -0,0 +1,26 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.yolov7 import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+@pytest.fixture(autouse=True)
+@skip_clone_repo_check
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.yolov7.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/yolov7/demo.py b/qai_hub_models/models/yolov7/demo.py
index 23b01552..65c4bff0 100644
--- a/qai_hub_models/models/yolov7/demo.py
+++ b/qai_hub_models/models/yolov7/demo.py
@@ -15,6 +15,7 @@
 def main(is_test: bool = False):
     yolo_detection_demo(
         YoloV7,
+        MODEL_ID,
         YoloV7DetectionApp,
         IMAGE_ADDRESS,
         YoloV7.STRIDE_MULTIPLE,
diff --git a/qai_hub_models/models/yolov7/export.py b/qai_hub_models/models/yolov7/export.py
index e58ad576..4a68cc79 100644
--- a/qai_hub_models/models/yolov7/export.py
+++ b/qai_hub_models/models/yolov7/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -109,35 +109,43 @@ def export_model(
     )
 
     # Trace the model
-    source_model = torch.jit.trace(model, make_torch_inputs(input_spec))
+    source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -146,35 +154,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, outputs_to_skip=[2]
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/yolov7/info.yaml b/qai_hub_models/models/yolov7/info.yaml
index 5cf326e6..88298456 100644
--- a/qai_hub_models/models/yolov7/info.yaml
+++ b/qai_hub_models/models/yolov7/info.yaml
@@ -13,6 +13,7 @@ research_paper: https://arxiv.org/abs/2207.02696
 research_paper_title: 'YOLOv7: Trainable bag-of-freebies sets new state-of-the-art
   for real-time object detectors'
 license: https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md
+deploy_license: https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md
 source_repo: https://github.com/WongKinYiu/yolov7/
 technical_details:
   Model checkpoint: YoloV7 Tiny
@@ -34,4 +35,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: gpl-3.0
+deploy_license_type: gpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/yolov7/model.py b/qai_hub_models/models/yolov7/model.py
index 63e56e72..f46bed59 100644
--- a/qai_hub_models/models/yolov7/model.py
+++ b/qai_hub_models/models/yolov7/model.py
@@ -13,8 +13,9 @@
     detect_postprocess,
     yolo_sample_inputs,
 )
+from qai_hub_models.models.common import SampleInputsType
 from qai_hub_models.utils.asset_loaders import SourceAsRoot
-from qai_hub_models.utils.base_model import BaseModel, InputsType
+from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.input_spec import InputSpec
 
 YOLOV7_SOURCE_REPOSITORY = "https://github.com/WongKinYiu/yolov7"
@@ -102,7 +103,7 @@ def get_input_spec(
         """
         return {"image": ((batch_size, num_channels, height, width), "float32")}
 
-    def sample_inputs(self, input_spec: InputSpec | None = None) -> InputsType:
+    def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType:
         if input_spec is not None and input_spec != YoloV7.get_input_spec():
             raise ValueError("Sample input has a fixed size that cannot be changed")
 
diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml
index 36dfb9a3..d9dc602f 100644
--- a/qai_hub_models/models/yolov7/perf.yaml
+++ b/qai_hub_models/models/yolov7/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
 - name: Yolo-v7
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 22349.0
-      throughput: 44.74473130788849
+      inference_time: 24023.0
+      throughput: 41.626774341256294
       estimated_peak_memory_range:
-        min: 9764864
-        max: 12574848
+        min: 9568256
+        max: 12076232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 21
         total_layers: 307
-      job_id: jvgddqzlg
+      job_id: jqpye94gy
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:22:25.772406Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 17674.0
+      throughput: 56.580287427860135
+      estimated_peak_memory_range:
+        min: 327680
+        max: 113867968
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 286
+        layers_on_gpu: 0
+        layers_on_cpu: 21
+        total_layers: 307
+      job_id: j2p0ynegw
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:10:34.471023Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:22:25.772414Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/yolov7/requirements.txt b/qai_hub_models/models/yolov7/requirements.txt
index 8e95168e..2978ccea 100644
--- a/qai_hub_models/models/yolov7/requirements.txt
+++ b/qai_hub_models/models/yolov7/requirements.txt
@@ -1,6 +1,3 @@
-matplotlib
-opencv-python
-PyYAML
-requests
-scipy
-seaborn
+matplotlib==3.7.4
+scipy==1.8.1
+seaborn==0.11.0
diff --git a/qai_hub_models/models/yolov7/test.py b/qai_hub_models/models/yolov7/test.py
index b2d84594..b8f204e1 100644
--- a/qai_hub_models/models/yolov7/test.py
+++ b/qai_hub_models/models/yolov7/test.py
@@ -45,6 +45,7 @@ def test_task():
             assert np.allclose(source_out_postprocessed[i], qaihm_out_postprocessed[i])
 
 
+@skip_clone_repo_check
 def test_yolov7_app():
     image = load_image(IMAGE_ADDRESS)
     output_image = load_image(OUTPUT_IMAGE_ADDRESS).convert("RGB")
@@ -52,5 +53,6 @@ def test_yolov7_app():
     assert np.allclose(app.predict_boxes_from_image(image)[0], np.asarray(output_image))
 
 
+@skip_clone_repo_check
 def test_demo():
     demo_main(is_test=True)
diff --git a/qai_hub_models/models/yolov8_det/README.md b/qai_hub_models/models/yolov8_det/README.md
index dbf1242c..91edcac5 100644
--- a/qai_hub_models/models/yolov8_det/README.md
+++ b/qai_hub_models/models/yolov8_det/README.md
@@ -1,16 +1,16 @@
 [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
 
 
-# [Yolo-v8-Detection: Real-time object detection optimized for mobile and edge](https://aihub.qualcomm.com/models/yolov8_det)
+# [YOLOv8-Detection: Real-time object detection optimized for mobile and edge by Ultralytics](https://aihub.qualcomm.com/models/yolov8_det)
 
-YoloV8 is a machine learning model that predicts bounding boxes and classes of objects in an image.
+Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes and classes of objects in an image.
 
-This is based on the implementation of Yolo-v8-Detection found
+This is based on the implementation of YOLOv8-Detection found
 [here](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/detect). This repository contains scripts for optimized on-device
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov8_det).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.yolov8_det.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -44,9 +44,9 @@ Additional options are documented with the `--help` option. Note that the above
 script requires access to Deployment instructions for Qualcomm® AI Hub.
 
 ## License
-- The license for the original implementation of Yolo-v8-Detection can be found
+- The license for the original implementation of YOLOv8-Detection can be found
   [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Real-Time Flying Object Detection with YOLOv8](https://arxiv.org/abs/2305.09972)
diff --git a/qai_hub_models/models/yolov8_det/conftest.py b/qai_hub_models/models/yolov8_det/conftest.py
new file mode 100644
index 00000000..0e32c6fa
--- /dev/null
+++ b/qai_hub_models/models/yolov8_det/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.yolov8_det import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.yolov8_det.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/yolov8_det/demo.py b/qai_hub_models/models/yolov8_det/demo.py
index 3e766a25..434a87f0 100644
--- a/qai_hub_models/models/yolov8_det/demo.py
+++ b/qai_hub_models/models/yolov8_det/demo.py
@@ -19,6 +19,7 @@
 def main(is_test: bool = False):
     yolo_detection_demo(
         YoloV8Detector,
+        MODEL_ID,
         YoloV8DetectionApp,
         IMAGE_ADDRESS,
         is_test=is_test,
diff --git a/qai_hub_models/models/yolov8_det/export.py b/qai_hub_models/models/yolov8_det/export.py
index e26be3de..2b207ddb 100644
--- a/qai_hub_models/models/yolov8_det/export.py
+++ b/qai_hub_models/models/yolov8_det/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -90,7 +90,7 @@ def export_model(
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "yolov8_det",
-            "Yolo-v8-Detection",
+            "YOLOv8-Detection",
             device,
             skip_profiling,
             skip_inferencing,
@@ -110,36 +110,44 @@ def export_model(
 
     # Trace the model
     source_model = torch.jit.trace(
-        model, make_torch_inputs(input_spec), check_trace=False
+        model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -148,35 +156,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, outputs_to_skip=[2]
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/yolov8_det/info.yaml b/qai_hub_models/models/yolov8_det/info.yaml
index bff88dc3..a987a227 100644
--- a/qai_hub_models/models/yolov8_det/info.yaml
+++ b/qai_hub_models/models/yolov8_det/info.yaml
@@ -1,21 +1,22 @@
-name: Yolo-v8-Detection
+name: YOLOv8-Detection
 # id must match with the model dir name in qai_hub_models
 id: yolov8_det
 status: public
-headline: Real-time object detection optimized for mobile and edge.
+headline: Real-time object detection optimized for mobile and edge by Ultralytics.
 domain: Computer Vision
 use_case: Object Detection
-description: YoloV8 is a machine learning model that predicts bounding boxes and classes
+description: Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes and classes
   of objects in an image.
 tags:
   - real-time
 research_paper: https://arxiv.org/abs/2305.09972
 research_paper_title: Real-Time Flying Object Detection with YOLOv8
 license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE
+deploy_license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE
 source_repo:
   https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/detect
 technical_details:
-  Model checkpoint: YoloV8-N
+  Model checkpoint: YOLOv8-N
   Input resolution: 640x640
   Number of parameters: 3.18M
   Model size: 12.2 MB
@@ -34,4 +35,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: yes
 license_type: agpl-3.0
+deploy_license_type: agpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml
index bbaddd57..ecd1b9a2 100644
--- a/qai_hub_models/models/yolov8_det/perf.yaml
+++ b/qai_hub_models/models/yolov8_det/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
-- name: Yolo-v8-Detection
+- name: YOLOv8-Detection
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 9251.0
-      throughput: 108.09642200843152
+      inference_time: 9217.0
+      throughput: 108.49517196484756
       estimated_peak_memory_range:
-        min: 233472
-        max: 2649168
+        min: 262144
+        max: 19308896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,14 +43,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 300
-      job_id: j7gjr2q8p
+      job_id: jo5mrw9gk
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-14T23:28:16.047386Z'
     torchscript_onnx_qnn:
-      inference_time: 7043.0
-      throughput: 141.9849495953429
+      inference_time: 7039.0
+      throughput: 142.06563432305725
       estimated_peak_memory_range:
-        min: 4939776
-        max: 19565584
+        min: 4984832
+        max: 18803744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -55,13 +66,43 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 294
-      job_id: jlpe7wy05
+      job_id: joprk4750
+      job_status: Passed
+  - torchscript_onnx_tflite:
+      inference_time: 6502.0
+      throughput: 153.79883112888342
+      estimated_peak_memory_range:
+        min: 24576
+        max: 83870080
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 300
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 300
+      job_id: jegn29qgo
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:08:50.678067Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-14T23:30:19.085764Z'
+    torchscript_onnx_qnn:
+      inference_time: 4840.0
+      throughput: 206.61157024793388
+      estimated_peak_memory_range:
+        min: 4947968
+        max: 123420640
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 294
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 294
+      job_id: jep287qp6
+      job_status: Passed
diff --git a/qai_hub_models/models/yolov8_det/requirements.txt b/qai_hub_models/models/yolov8_det/requirements.txt
index 5d6e5cf5..94980b0d 100644
--- a/qai_hub_models/models/yolov8_det/requirements.txt
+++ b/qai_hub_models/models/yolov8_det/requirements.txt
@@ -1 +1,3 @@
+seaborn==0.11.0
+thop==0.1.1.post2209072238
 ultralytics==8.0.193
diff --git a/qai_hub_models/models/yolov8_det/test.py b/qai_hub_models/models/yolov8_det/test.py
index 13614261..a98da871 100644
--- a/qai_hub_models/models/yolov8_det/test.py
+++ b/qai_hub_models/models/yolov8_det/test.py
@@ -17,7 +17,6 @@
 )
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
 from qai_hub_models.utils.image_processing import preprocess_PIL_image
-from qai_hub_models.utils.testing import skip_clone_repo_check
 
 OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
     MODEL_ID, MODEL_ASSET_VERSION, "test_images/output_image.png"
@@ -25,7 +24,6 @@
 WEIGHTS = "yolov8n.pt"
 
 
-@skip_clone_repo_check
 def test_task():
     """Verify that raw (numeric) outputs of both (QAIHM and non-qaihm) networks are the same."""
     processed_sample_image = preprocess_PIL_image(load_image(IMAGE_ADDRESS))
diff --git a/qai_hub_models/models/yolov8_seg/README.md b/qai_hub_models/models/yolov8_seg/README.md
index b3c82eec..f29edea1 100644
--- a/qai_hub_models/models/yolov8_seg/README.md
+++ b/qai_hub_models/models/yolov8_seg/README.md
@@ -1,16 +1,16 @@
 [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
 
 
-# [Yolo-v8-Segmentation: Real-time object segmentation optimized for mobile and edge](https://aihub.qualcomm.com/models/yolov8_seg)
+# [YOLOv8-Segmentation: Real-time object segmentation optimized for mobile and edge by Ultralytics](https://aihub.qualcomm.com/models/yolov8_seg)
 
-YoloV8 is a machine learning model that predicts bounding boxes, segmentation masks and classes of objects in an image.
+Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes, segmentation masks and classes of objects in an image.
 
-This is based on the implementation of Yolo-v8-Segmentation found
+This is based on the implementation of YOLOv8-Segmentation found
 [here](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/segment). This repository contains scripts for optimized on-device
 export suitable to run on Qualcomm® devices. More details on model performance
 accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov8_seg).
 
-[Sign up](https://aihub.qualcomm.com/) for early access to run these models on
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
 a hosted Qualcomm® device.
 
 
@@ -30,7 +30,7 @@ python -m qai_hub_models.models.yolov8_seg.demo
 More details on the CLI tool can be found with the `--help` option. See
 [demo.py](demo.py) for sample usage of the model including pre/post processing
 scripts. Please refer to our [general instructions on using
-models](../../#qai-hub-models) for more usage instructions.
+models](../../../#getting-started) for more usage instructions.
 
 ## Export for on-device deployment
 
@@ -44,9 +44,9 @@ Additional options are documented with the `--help` option. Note that the above
 script requires access to Deployment instructions for Qualcomm® AI Hub.
 
 ## License
-- The license for the original implementation of Yolo-v8-Segmentation can be found
+- The license for the original implementation of YOLOv8-Segmentation can be found
   [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
-- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf).
+- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url})
 
 ## References
 * [Real-Time Flying Object Detection with YOLOv8](https://arxiv.org/abs/2305.09972)
diff --git a/qai_hub_models/models/yolov8_seg/conftest.py b/qai_hub_models/models/yolov8_seg/conftest.py
new file mode 100644
index 00000000..273c44c1
--- /dev/null
+++ b/qai_hub_models/models/yolov8_seg/conftest.py
@@ -0,0 +1,24 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+from unittest.mock import patch
+
+import pytest
+
+from qai_hub_models.models.yolov8_seg import Model
+
+
+@pytest.fixture(autouse=True)
+def mock_from_pretrained():
+    """
+    Model.from_pretrained() can be slow. Invoke it once and cache it so all invocations
+    across all tests return the cached instance of the model.
+    """
+    mock = patch(
+        "qai_hub_models.models.yolov8_seg.Model.from_pretrained",
+        return_value=Model.from_pretrained(),
+    )
+    mock.start()
diff --git a/qai_hub_models/models/yolov8_seg/demo.py b/qai_hub_models/models/yolov8_seg/demo.py
index bd194c22..fc39d386 100644
--- a/qai_hub_models/models/yolov8_seg/demo.py
+++ b/qai_hub_models/models/yolov8_seg/demo.py
@@ -68,7 +68,7 @@ def yolov8_seg_demo(
         help="Intersection over Union (IoU) threshold for NonMaximumSuppression",
     )
     args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_type.get_model_id())
+    validate_on_device_demo_args(args, MODEL_ID)
 
     if args.image is None:
         image_path = default_image.fetch()
@@ -76,7 +76,7 @@ def yolov8_seg_demo(
         image_path = args.image
 
     # Load image & model
-    model = demo_model_from_cli_args(model_type, args, check_trace=False)
+    model = demo_model_from_cli_args(model_type, MODEL_ID, args)
     app = YoloV8SegmentationApp(model, args.score_threshold, args.iou_threshold)
 
     print("Model Loaded")
diff --git a/qai_hub_models/models/yolov8_seg/export.py b/qai_hub_models/models/yolov8_seg/export.py
index 366b63ea..227c0a81 100644
--- a/qai_hub_models/models/yolov8_seg/export.py
+++ b/qai_hub_models/models/yolov8_seg/export.py
@@ -10,7 +10,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import qai_hub as hub
 import torch
@@ -90,7 +90,7 @@ def export_model(
     if not can_access_qualcomm_ai_hub():
         return export_without_hub_access(
             "yolov8_seg",
-            "Yolo-v8-Segmentation",
+            "YOLOv8-Segmentation",
             device,
             skip_profiling,
             skip_inferencing,
@@ -110,36 +110,44 @@ def export_model(
 
     # Trace the model
     source_model = torch.jit.trace(
-        model, make_torch_inputs(input_spec), check_trace=False
+        model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
 
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
         target_runtime, compile_options + " --force_channel_last_input image"
     )
-    print(f"Optimizing model {model_name} to run on-device.")
-    compile_job = hub.submit_compile_job(
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
         model=source_model,
         input_specs=input_spec,
         device=hub.Device(device),
         name=model_name,
         options=model_compile_options,
     )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
 
     # 3. Profile the model asset on real devices
-    profile_job = None
+    profile_job: Optional[hub.client.ProfileJob] = None
     if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(f"Profiling model {model_name} on a hosted device.")
-        profile_job = hub.submit_profile_job(
+        submitted_profile_job = hub.submit_profile_job(
             model=compile_job.get_target_model(),
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
 
     # 4. Run inference on-device with sample inputs
-    inference_job = None
+    inference_job: Optional[hub.client.InferenceJob] = None
     if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
@@ -148,35 +156,37 @@ def export_model(
         hub_inputs = transpose_channel_first_to_last(
             "image", sample_inputs, target_runtime
         )
-        inference_job = hub.submit_inference_job(
+        submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
             inputs=hub_inputs,
             device=hub.Device(device),
             name=model_name,
-            options=profile_options,
+            options=profile_options_all,
         )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
 
     # 5. Download the model asset to a local file
     if not skip_downloading:
         os.makedirs(output_path, exist_ok=True)
-        target_model = compile_job.get_target_model()
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
         target_model.download(str(output_path / f"{model_name}.tflite"))
 
     # 6. Summarize the results from profiling and inference
     if not skip_summary and not skip_profiling:
-        assert profile_job.wait().success
-        profile_data = profile_job.download_profile()
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
         torch_out = torch_inference(model, sample_inputs)
-        assert inference_job.wait().success
-        inference_result = inference_job.download_output_data()
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
         print_inference_metrics(
             inference_job, inference_result, torch_out, outputs_to_skip=[3]
         )
 
-    print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
 
     return (compile_job, profile_job, inference_job)
 
diff --git a/qai_hub_models/models/yolov8_seg/info.yaml b/qai_hub_models/models/yolov8_seg/info.yaml
index 7397936c..f20e4138 100644
--- a/qai_hub_models/models/yolov8_seg/info.yaml
+++ b/qai_hub_models/models/yolov8_seg/info.yaml
@@ -1,21 +1,22 @@
-name: Yolo-v8-Segmentation
+name: YOLOv8-Segmentation
 # id must match with the model dir name in qai_hub_models
 id: yolov8_seg
 status: public
-headline: Real-time object segmentation optimized for mobile and edge.
+headline: Real-time object segmentation optimized for mobile and edge by Ultralytics.
 domain: Computer Vision
 use_case: Semantic Segmentation
-description: YoloV8 is a machine learning model that predicts bounding boxes, segmentation
+description: Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes, segmentation
   masks and classes of objects in an image.
 tags:
   - real-time
 research_paper: https://arxiv.org/abs/2305.09972
 research_paper_title: Real-Time Flying Object Detection with YOLOv8
 license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE
+deploy_license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE
 source_repo:
   https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/segment
 technical_details:
-  Model checkpoint: YoloV8N-Seg
+  Model checkpoint: YOLOv8N-Seg
   Input resolution: 640x640
   Number of parameters: 3.43M
   Model size: 13.2 MB
@@ -38,4 +39,5 @@ form_factors:
 has_static_banner: yes
 has_animated_banner: no
 license_type: agpl-3.0
+deploy_license_type: agpl-3.0
 dataset: []
diff --git a/qai_hub_models/models/yolov8_seg/model.py b/qai_hub_models/models/yolov8_seg/model.py
index ff3ac466..bbd5fee2 100644
--- a/qai_hub_models/models/yolov8_seg/model.py
+++ b/qai_hub_models/models/yolov8_seg/model.py
@@ -72,8 +72,8 @@ def forward(self, image: torch.Tensor):
         boxes, scores, masks, classes = yolov8_segment_postprocess(predictions[0])
         return boxes, scores, masks, classes, predictions[1][-1]
 
+    @staticmethod
     def get_input_spec(
-        self,
         batch_size: int = 1,
         num_channels: int = 3,
         height: int = 640,
diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml
index 6e25b242..1f87df00 100644
--- a/qai_hub_models/models/yolov8_seg/perf.yaml
+++ b/qai_hub_models/models/yolov8_seg/perf.yaml
@@ -17,22 +17,25 @@ aggregated:
   - Samsung Galaxy S23
   - Samsung Galaxy S23 Ultra
   - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
   - Samsung Galaxy Tab S8
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
   - Snapdragon® 888
 models:
-- name: Yolo-v8-Segmentation
+- name: YOLOv8-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 10686.0
-      throughput: 93.58038555118847
+      inference_time: 10665.0
+      throughput: 93.76465072667604
       estimated_peak_memory_range:
         min: 4616192
-        max: 6819472
+        max: 6990768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -40,8 +43,16 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 337
-      job_id: jz57el6qp
+      job_id: j1glnxepv
       job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-03-15T00:08:48.972058Z'
     torchscript_onnx_qnn:
       inference_time: 'null'
       throughput: 'null'
@@ -57,11 +68,41 @@ models:
         total_layers: 0
       job_id: ''
       job_status: Skipped
+  - torchscript_onnx_tflite:
+      inference_time: 7417.0
+      throughput: 134.8254011055683
+      estimated_peak_memory_range:
+        min: 53248
+        max: 91611328
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 337
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 337
+      job_id: jw5667v5o
+      job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S23 Ultra
-      os: '13'
+      name: Samsung Galaxy S24
+      os: '14'
       form_factor: Phone
       os_name: Android
       manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-02-21T16:36:07.212007Z'
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-03-15T00:08:48.972071Z'
+    torchscript_onnx_qnn:
+      inference_time: 'null'
+      throughput: 'null'
+      estimated_peak_memory_range:
+        min: 0
+        max: 0
+      primary_compute_unit: 'null'
+      precision: 'null'
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 0
+      job_id: ''
+      job_status: Skipped
diff --git a/qai_hub_models/models/yolov8_seg/requirements.txt b/qai_hub_models/models/yolov8_seg/requirements.txt
index 5d6e5cf5..94980b0d 100644
--- a/qai_hub_models/models/yolov8_seg/requirements.txt
+++ b/qai_hub_models/models/yolov8_seg/requirements.txt
@@ -1 +1,3 @@
+seaborn==0.11.0
+thop==0.1.1.post2209072238
 ultralytics==8.0.193
diff --git a/qai_hub_models/models/yolov8_seg/test.py b/qai_hub_models/models/yolov8_seg/test.py
index f477d402..46c927d7 100644
--- a/qai_hub_models/models/yolov8_seg/test.py
+++ b/qai_hub_models/models/yolov8_seg/test.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import numpy as np
+import pytest
 import torch
 from ultralytics import YOLO as ultralytics_YOLO
 
@@ -40,6 +41,7 @@ def test_task():
             assert np.allclose(source_out_postprocessed[i], qaihm_out_postprocessed[i])
 
 
+@pytest.mark.trace
 def test_trace():
     net = YoloV8Segmentor.from_pretrained(WEIGHTS)
     input_spec = net.get_input_spec()
diff --git a/qai_hub_models/requirements-dev.txt b/qai_hub_models/requirements-dev.txt
index 578fca5f..be5243dc 100644
--- a/qai_hub_models/requirements-dev.txt
+++ b/qai_hub_models/requirements-dev.txt
@@ -1,20 +1,18 @@
-boto3
-botocore
+boto3==1.34.40
+botocore==1.34.40
 coverage==6.5.0
-huggingface-hub==0.20.3
+imageio[ffmpeg]==2.31.5
 jinja2==3.0.3
 mypy==0.991
-protobuf==3.20.3
+pre-commit==3.5.0
 pytest-cov==4.1.0
 pytest-xdist==3.3.1
-pyyaml==6.0.1
-ruamel-yaml
+ruamel-yaml==0.18.6
 schema==0.7.5
-scikit-image>=0.21.0
-tensorflow-cpu==2.13.0; sys_platform != 'darwin'
-tensorflow-macos==2.13.0; sys_platform == 'darwin'
-types-PyYAML
-types-pillow
-types-tabulate
-types-requests
+scikit-image==0.21.0
+tflite==2.10.0
+types-PyYAML==6.0.12.12
+types-pillow==10.2.0.20240213
+types-tabulate==0.9.0.20240106
+types-requests==2.31.0.6
 keyrings.envvars; python_version >= '3.9' # used only by CI
diff --git a/qai_hub_models/requirements.txt b/qai_hub_models/requirements.txt
index 9e263481..f25dc8e9 100644
--- a/qai_hub_models/requirements.txt
+++ b/qai_hub_models/requirements.txt
@@ -1,19 +1,25 @@
 Pillow==10.0.1
+deprecation==2.1.0
+fsspec==2023.6.0
 gdown==4.7.1
-gitpython
-huggingface_hub
-ipython
+gitpython==3.1.42
+huggingface_hub==0.20.3
+ipython==8.12.3
 numpy==1.23.1
 opencv-python==4.8.1.78
-pandas
-prettytable
+packaging==23.2
+pandas==1.5.3
+prettytable==3.9.0
+protobuf==3.20.2
 pytest==7.4.2
-pyyaml
-qai_hub>=0.9.0
-requests
-requests_toolbelt
-schema
-tabulate
+pyyaml==6.0.1
+requests_toolbelt==1.0.0
+schema==0.7.5
+scipy==1.8.1
+tabulate==0.9.0
 torch==1.13.1
-torchvision<=0.14.1
-urllib3<2
+torchvision==0.14.1
+typing-extensions==4.5.0
+tqdm==4.66.2
+urllib3==1.26.18
+qai_hub>=0.9.0
diff --git a/qai_hub_models/test/e2e/test_aimet_compile.py b/qai_hub_models/test/e2e/test_aimet_compile.py
index 0bd090c5..f3d52c36 100644
--- a/qai_hub_models/test/e2e/test_aimet_compile.py
+++ b/qai_hub_models/test/e2e/test_aimet_compile.py
@@ -6,7 +6,7 @@
 import pytest
 import qai_hub as hub
 
-from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable
+from qai_hub_models.models.squeezenet1_1_quantized.model import SqueezeNetQuantizable
 from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
 from qai_hub_models.utils.inference import compile_zoo_model_to_hub
 from qai_hub_models.utils.measurement import get_model_size_mb
@@ -16,16 +16,15 @@
 @pytest.mark.parametrize(
     "source_model_format,target_runtime,expected_size_mb",
     [
-        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, 3.4),
-        (SourceModelFormat.TORCHSCRIPT, TargetRuntime.TFLITE, 3.4),
-        (SourceModelFormat.ONNX, TargetRuntime.QNN, 3.8),
-        (SourceModelFormat.TORCHSCRIPT, TargetRuntime.QNN, 3.8),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, 1.3),
+        (SourceModelFormat.TORCHSCRIPT, TargetRuntime.TFLITE, 1.3),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, 1.6),
     ],
 )
 def test_compile_aimet(
     source_model_format, target_runtime, expected_size_mb, skip_clone_repo_check_fixture
 ):
-    model = MobileNetV2Quantizable.from_pretrained()
+    model = SqueezeNetQuantizable.from_pretrained()
 
     calibration_data = model.get_calibration_data(target_runtime)
 
diff --git a/qai_hub_models/utils/aimet/config_loader.py b/qai_hub_models/utils/aimet/config_loader.py
index b18bcf0e..dadc6012 100644
--- a/qai_hub_models/utils/aimet/config_loader.py
+++ b/qai_hub_models/utils/aimet/config_loader.py
@@ -5,13 +5,19 @@
 from pathlib import Path
 
 
-def get_default_aimet_config() -> str:
-    path = Path(__file__).parent / "default_config.json"
+def get_default_aimet_config_legacy_v1() -> str:
+    path = Path(__file__).parent / "default_config_legacy_v1.json"
     return str(path.resolve())
 
 
-def get_per_channel_aimet_config() -> str:
-    path = Path(__file__).parent / "default_config_per_channel.json"
+def get_default_aimet_config_legacy_v2() -> str:
+    # Introduced per-channel weights
+    path = Path(__file__).parent / "default_config_legacy_v2.json"
+    return str(path.resolve())
+
+
+def get_default_aimet_config() -> str:
+    path = Path(__file__).parent / "default_config.json"
     return str(path.resolve())
 
 
diff --git a/qai_hub_models/utils/aimet/default_config.json b/qai_hub_models/utils/aimet/default_config.json
index f616005d..c7b13e15 100644
--- a/qai_hub_models/utils/aimet/default_config.json
+++ b/qai_hub_models/utils/aimet/default_config.json
@@ -11,14 +11,15 @@
       "is_symmetric": "True"
     },
     "strict_symmetric": "False",
-  "per_channel_quantization": "False"
+    "unsigned_symmetric": "False",
+    "per_channel_quantization": "True"
   },
 
   "params":
   {
     "bias":
     {
-      "is_quantized": "True"
+      "is_quantized": "False"
     }
   },
 
@@ -26,19 +27,19 @@
   {
     "Squeeze":
     {
-      "is_output_quantized": "False"
+      "is_output_quantized": "True"
     },
     "Pad":
     {
-      "is_output_quantized": "False"
+      "is_output_quantized": "True"
     },
     "Mean":
     {
       "is_output_quantized": "False"
     },
-    "Gather":
+    "Gemm":
     {
-      "is_output_quantized": "False"
+      "per_channel_quantization": "False"
     }
   },
 
@@ -47,11 +48,11 @@
     {
       "op_list": ["Conv", "Relu"]
     },
-    {
-      "op_list": ["ConvTranspose", "Relu"]
-    },
-    {
+  {
       "op_list": ["Conv", "Clip"]
+  },
+    {
+      "op_list": ["Conv", "BatchNormalization", "Relu"]
     },
     {
       "op_list": ["Add", "Relu"]
diff --git a/qai_hub_models/utils/aimet/default_config_legacy_v1.json b/qai_hub_models/utils/aimet/default_config_legacy_v1.json
new file mode 100644
index 00000000..f616005d
--- /dev/null
+++ b/qai_hub_models/utils/aimet/default_config_legacy_v1.json
@@ -0,0 +1,71 @@
+{
+  "defaults":
+  {
+    "ops":
+    {
+      "is_output_quantized": "True"
+    },
+    "params":
+    {
+      "is_quantized": "True",
+      "is_symmetric": "True"
+    },
+    "strict_symmetric": "False",
+  "per_channel_quantization": "False"
+  },
+
+  "params":
+  {
+    "bias":
+    {
+      "is_quantized": "True"
+    }
+  },
+
+  "op_type":
+  {
+    "Squeeze":
+    {
+      "is_output_quantized": "False"
+    },
+    "Pad":
+    {
+      "is_output_quantized": "False"
+    },
+    "Mean":
+    {
+      "is_output_quantized": "False"
+    },
+    "Gather":
+    {
+      "is_output_quantized": "False"
+    }
+  },
+
+  "supergroups":
+  [
+    {
+      "op_list": ["Conv", "Relu"]
+    },
+    {
+      "op_list": ["ConvTranspose", "Relu"]
+    },
+    {
+      "op_list": ["Conv", "Clip"]
+    },
+    {
+      "op_list": ["Add", "Relu"]
+    },
+    {
+      "op_list": ["Gemm", "Relu"]
+    }
+  ],
+
+  "model_input":
+  {
+    "is_input_quantized": "True"
+  },
+
+  "model_output":
+  {}
+}
diff --git a/qai_hub_models/utils/aimet/default_config_per_channel.json b/qai_hub_models/utils/aimet/default_config_legacy_v2.json
similarity index 100%
rename from qai_hub_models/utils/aimet/default_config_per_channel.json
rename to qai_hub_models/utils/aimet/default_config_legacy_v2.json
diff --git a/qai_hub_models/utils/aimet/repo.py b/qai_hub_models/utils/aimet/repo.py
new file mode 100644
index 00000000..1d7a6ab5
--- /dev/null
+++ b/qai_hub_models/utils/aimet/repo.py
@@ -0,0 +1,32 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from contextlib import contextmanager
+
+from qai_hub_models.utils.asset_loaders import SourceAsRoot, find_replace_in_repo
+
+AIMET_ZOO_SOURCE_REPOSITORY = "https://github.com/quic/aimet-model-zoo"
+AIMET_ZOO_SOURCE_REPO_COMMIT = "d09d2b0404d10f71a7640a87e9d5e5257b028802"
+REPO_ASSET_VERSION = 1
+
+
+@contextmanager
+def aimet_zoo_as_root():
+    with SourceAsRoot(
+        AIMET_ZOO_SOURCE_REPOSITORY,
+        AIMET_ZOO_SOURCE_REPO_COMMIT,
+        source_repo_name="aimet_zoo",
+        source_repo_version=REPO_ASSET_VERSION,
+        keep_sys_modules=True,
+    ) as repo_root:
+        # Remove import of model_definition.py as it has an import error itself,
+        # but we don't need anything from that file here
+        find_replace_in_repo(
+            repo_root,
+            "aimet_zoo_torch/quicksrnet/__init__.py",
+            "from .model.model_definition import QuickSRNet",
+            " ",
+        )
+
+        yield repo_root
diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py
index cbddf19c..8b60a63d 100644
--- a/qai_hub_models/utils/args.py
+++ b/qai_hub_models/utils/args.py
@@ -18,14 +18,12 @@
 
 import qai_hub as hub
 
-from qai_hub_models.utils.base_model import (
-    BaseModel,
+from qai_hub_models.models.protocols import (
     FromPrecompiledTypeVar,
-    FromPretrainedMixin,
+    FromPretrainedProtocol,
     FromPretrainedTypeVar,
-    InputSpec,
-    TargetRuntime,
 )
+from qai_hub_models.utils.base_model import BaseModel, InputSpec, TargetRuntime
 from qai_hub_models.utils.inference import HubModel
 from qai_hub_models.utils.qai_hub_helpers import _AIHUB_NAME, can_access_qualcomm_ai_hub
 
@@ -61,6 +59,9 @@ def add_output_dir_arg(parser: argparse.ArgumentParser) -> argparse.ArgumentPars
 def add_target_runtime_arg(
     parser: argparse.ArgumentParser,
     help: str,
+    available_target_runtimes: List[TargetRuntime] = list(
+        TargetRuntime.__members__.values()
+    ),
     default: TargetRuntime = TargetRuntime.TFLITE,
 ) -> argparse.ArgumentParser:
     parser.add_argument(
@@ -68,7 +69,7 @@ def add_target_runtime_arg(
         type=str,
         action=partial(ParseEnumAction, enum_type=TargetRuntime),  # type: ignore
         default=default,
-        choices=[name.lower() for name in TargetRuntime._member_names_],
+        choices=[rt.name.lower() for rt in available_target_runtimes],
         help=help,
     )
     return parser
@@ -124,6 +125,7 @@ def get_on_device_demo_parser(
         parser,
         help="The runtime to demo (if --on-device is specified).",
         default=default_runtime,
+        available_target_runtimes=available_target_runtimes,
     )
 
     return parser
@@ -139,7 +141,7 @@ def validate_on_device_demo_args(args: argparse.Namespace, model_name: str):
     if args.on_device and not can_access_qualcomm_ai_hub():
         print(
             "On-device demos are not available without Qualcomm® AI Hub access.",
-            "Please sign up for Qualcomm® AI Hub at https://aihub.qualcomm.com/.",
+            "Please sign up for Qualcomm® AI Hub at https://myaccount.qualcomm.com/signup .",
             sep=os.linesep,
         )
         sys.exit(1)
@@ -210,8 +212,8 @@ def model_from_cli_args(
 
 def demo_model_from_cli_args(
     model_cls: Type[FromPretrainedTypeVar],
+    model_id: str,
     cli_args: argparse.Namespace,
-    check_trace: bool = True,
 ) -> FromPretrainedTypeVar | HubModel:
     """
     Create this model from an argparse namespace.
@@ -219,27 +221,23 @@ def demo_model_from_cli_args(
 
     If the model is a BaseModel and an on-device demo is requested, the BaseModel will be wrapped in a HubModel.
     """
-    model = model_from_cli_args(
-        model_cls, cli_args
-    )  # TODO(9494): This should be replaced by static input spec
     is_on_device = "on_device" in cli_args and cli_args.on_device
     inference_model: FromPretrainedTypeVar | HubModel
-    if is_on_device and isinstance(model, BaseModel):
+    if is_on_device and issubclass(model_cls, BaseModel):
         device = hub.Device(cli_args.device, cli_args.device_os)
         if cli_args.hub_model_id:
             model_from_hub = hub.get_model(cli_args.hub_model_id)
             inference_model = HubModel(
                 model_from_hub,
-                list(model.get_input_spec().keys()),
+                list(model_cls.get_input_spec().keys()),
                 device,
                 cli_args.inference_options,
             )
         else:
-            model_cls = model_cls
-            export_file = f"qai_hub_models.models.{model.get_model_id()}.export"
+            export_file = f"qai_hub_models.models.{model_id}.export"
             export_module = import_module(export_file)
             compile_job: hub.CompileJob
-            print(f"Compiling on-device model asset for {model.get_model_id()}.")
+            print(f"Compiling on-device model asset for {model_id}.")
             print(
                 f"Running python -m {export_file} --device {device.name} --target-runtime {cli_args.target_runtime.name.lower()}\n"
             )
@@ -262,7 +260,7 @@ def demo_model_from_cli_args(
             target_model = compile_job.get_target_model()
             assert target_model is not None
 
-            input_names = list(model.get_input_spec().keys())
+            input_names = list(model_cls.get_input_spec().keys())
             inference_model = HubModel(
                 target_model,
                 input_names,
@@ -271,7 +269,7 @@ def demo_model_from_cli_args(
             )
             print(f"Exported asset: {inference_model.model.name}\n")
     else:
-        inference_model = model
+        inference_model = model_from_cli_args(model_cls, cli_args)
     return inference_model
 
 
@@ -419,7 +417,7 @@ def export_parser(
             help="Which components of the model to be exported.",
         )
 
-    if issubclass(model_cls, FromPretrainedMixin):
+    if issubclass(model_cls, FromPretrainedProtocol):
         # Skip adding CLI from model for compiled model
         # TODO: #9408 Refactor BaseModel, BasePrecompiledModel to fetch
         # parameters from compiled model
diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py
index 70eabe0a..652d259c 100644
--- a/qai_hub_models/utils/asset_loaders.py
+++ b/qai_hub_models/utils/asset_loaders.py
@@ -6,6 +6,7 @@
 
 import fileinput
 import json
+import logging
 import os
 import shutil
 import sys
@@ -55,6 +56,17 @@ def always_answer_prompts(answer):
         _always_answer = old_value
 
 
+@contextmanager
+def set_log_level(log_level: int):
+    logger = logging.getLogger()
+    old_level = logger.level
+    try:
+        logger.setLevel(log_level)
+        yield
+    finally:
+        logger.setLevel(old_level)
+
+
 class QAIHM_WEB_ASSET(Enum):
     STATIC_IMG = 0
     ANIMATED_MOV = 1
@@ -412,12 +424,20 @@ def load_torch(pt: PathType) -> Any:
     return _load_file(pt, partial(torch.load, map_location="cpu"))
 
 
-def load_json(json_file: PathType) -> Dict:
+def load_json(json_filepath: PathType) -> Dict:
     def _load_json_helper(file_path) -> Any:
         with open(file_path, "r") as json_file:
             return json.load(json_file)
 
-    return _load_file(json_file, _load_json_helper)
+    return _load_file(json_filepath, _load_json_helper)
+
+
+def load_yaml(yaml_filepath: PathType) -> Dict:
+    def _load_yaml_helper(file_path) -> Any:
+        with open(file_path, "r") as yaml_file:
+            return yaml.safe_load(yaml_file)
+
+    return _load_file(yaml_filepath, _load_yaml_helper)
 
 
 def load_path(file: PathType, tmpdir: tempfile.TemporaryDirectory | str) -> str | Path:
@@ -439,7 +459,7 @@ def SourceAsRoot(
     source_repo_name: str,
     source_repo_version: int | str,
     source_repo_patches: List[str] = [],
-    keep_sys_path: bool = False,
+    keep_sys_modules: bool = False,
 ):
     """
     Context manager that runs code with:
@@ -457,21 +477,36 @@ def SourceAsRoot(
         patches=source_repo_patches,
     )
     SOURCE_AS_ROOT_LOCK.acquire()
-    cwd = os.getcwd()
     original_path = list(sys.path)
+    original_modules = dict(sys.modules)
+    cwd = os.getcwd()
     try:
+        # If repo path already in sys.path from previous load,
+        # delete it and put it first
+        if repository_path in sys.path:
+            sys.path.remove(repository_path)
         # Patch path for this load only, since the model source
         # code references modules via a global scope.
         # Insert with highest priority (see #7666)
         sys.path.insert(0, repository_path)
         os.chdir(repository_path)
-
         yield repository_path
     finally:
         # Be careful editing these lines (failure means partial clean-up)
         os.chdir(cwd)
-        if not keep_sys_path:
-            sys.path = original_path
+        sys.path = original_path
+        if not keep_sys_modules:
+            # When you call something like `import models`, it loads the `models` module
+            # into sys.modules so all future `import models` point to that module.
+            #
+            # We want all imports done within the sub-repo to be either deleted from
+            # sys.modules or restored to the previous module if one was overwritten.
+            for name, module in list(sys.modules.items()):
+                if (getattr(module, "__file__", "") or "").startswith(repository_path):
+                    if name in original_modules:
+                        sys.modules[name] = original_modules[name]
+                    else:
+                        del sys.modules[name]
         SOURCE_AS_ROOT_LOCK.release()
 
 
diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py
index eea39fac..5a0ec84e 100644
--- a/qai_hub_models/utils/base_model.py
+++ b/qai_hub_models/utils/base_model.py
@@ -4,107 +4,105 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
-import os
-from abc import ABC, ABCMeta, abstractmethod
-from enum import Enum
-from inspect import getmodule
-from typing import Any, Dict, List, Type, TypeVar
+from pathlib import Path
+from typing import Any
 
-import numpy as np
 import torch
 from qai_hub.client import SourceModel
 
-from qai_hub_models.evaluators.base_evaluators import BaseEvaluator
+from qai_hub_models.models.common import (
+    SampleInputsType,
+    SourceModelFormat,
+    TargetRuntime,
+)
+from qai_hub_models.models.protocols import (
+    ExecutableModelProtocol,
+    FromPrecompiledProtocol,
+    FromPretrainedProtocol,
+    HubModelProtocol,
+)
 from qai_hub_models.utils.input_spec import InputSpec, make_torch_inputs
 
-InputsType = Dict[str, List[np.ndarray]]
-
-
-class TargetRuntime(Enum):
-    TFLITE = 0
-    QNN = 1
-
-    def __str__(self):
-        return self.name.lower()
 
+class CollectionModel:
+    """
+    Model that glues together several BaseModels
+    """
 
-class SourceModelFormat(Enum):
-    ONNX = 0
-    TORCHSCRIPT = 1
+    pass
 
 
-class DocstringInheritorMeta(ABCMeta):
+class HubModel(HubModelProtocol):
     """
-    Ensures that all subclasses retain the `forward` function's docstring.
+    Base interface for AI Hub models.
     """
 
-    def __new__(cls, name, bases, dct):
-        new_class = super().__new__(cls, name, bases, dct)
-        if hasattr(new_class, "forward"):
-            parent_method = getattr(bases[0], "forward", None)
-            if parent_method and new_class.forward.__doc__ is None:  # type: ignore
-                new_class.forward.__doc__ = parent_method.__doc__  # type: ignore
-        return new_class
-
+    def __init__(self):
+        # Change self.get_input_spec() to call _get_input_spec_for_model_instance() instead.
+        #
+        # _get_input_spec_for_model_instance() is an override that allows get_input_spec()
+        # to access instance variables. This may be used in case input shape is "hard-coded"
+        # based on parameters passed to the model upon initialization.
+        #
+        self.get_input_spec = self._get_input_spec_for_model_instance
 
-# Use this for typehints that take in a class and output an instance of the class.
-FromPretrainedTypeVar = TypeVar("FromPretrainedTypeVar", bound="FromPretrainedMixin")
-FromPrecompiledTypeVar = TypeVar("FromPrecompiledTypeVar", bound="FromPrecompiledMixin")
+    def _get_input_spec_for_model_instance(self, *args, **kwargs) -> InputSpec:
+        """
+        Get the input specifications for an instance of this model.
 
+        Typically this will pre-fill inputs of get_input_spec
+        with values determined by instance members of the model class.
 
-class FromPretrainedMixin(ABC):
-    @classmethod
-    @abstractmethod
-    def from_pretrained(
-        cls: Type[FromPretrainedTypeVar], *args, **kwargs
-    ) -> FromPretrainedTypeVar:
+        The initializer for BaseModel will automatically override get_input_spec
+        with this function when the class is instantiated.
         """
-        Utility function that helps users get up and running with a default
-        pretrained model. While this function may take arguments, all arguments
-        should have default values specified, so that all classes can be invoked
-        with `cls.from_pretrained()` and always have it return something reasonable.
-        """
-        pass
-
+        return self.__class__.get_input_spec(*args, **kwargs)
 
-class CollectionModel(FromPretrainedMixin):
-    """
-    Model that glues together several BaseModels
-    """
+    def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType:
+        """
+        Returns a set of sample inputs for the model.
 
-    pass
+        For each input name in the model, a list of numpy arrays is provided.
+        If the returned set is batch N, all input names must contain exactly N numpy arrays.
 
+        This is a default implementation that returns a single random data array
+        for each input name based on the shapes and dtypes in `get_input_spec`.
 
-class BaseModel(
-    torch.nn.Module, FromPretrainedMixin, ABC, metaclass=DocstringInheritorMeta
-):
-    @abstractmethod
-    def get_input_spec(self, *args, **kwargs) -> InputSpec:
-        """
-        Returns a map from `{input_name -> (shape, dtype)}`
-        specifying the shape and dtype for each input argument.
+        A subclass may choose to override this and fetch a batch of real input data
+        from a data source.
         """
-        pass
+        if not input_spec:
+            input_spec = self.get_input_spec()
+        inputs_dict = {}
+        inputs_list = make_torch_inputs(input_spec)
+        for i, input_name in enumerate(input_spec.keys()):
+            inputs_dict[input_name] = [inputs_list[i].numpy()]
+        return inputs_dict
 
-    @classmethod
-    def get_model_id(cls) -> str:
+    def get_hub_profile_options(
+        self,
+        target_runtime: TargetRuntime,
+        other_profile_options: str = "",
+    ) -> str:
         """
-        Return model ID for this model.
-        The model ID is the same as the folder name for the model under qai_hub_models/models/...
+        AI Hub profile options recommended for the model.
         """
-        module = getmodule(cls)
-        if not module or not module.__file__:
-            raise ValueError(f"Unable to get model ID for {cls.__name__}")
+        return other_profile_options
 
-        # Module path is always .../qai_hub_models/models/<model_id>/model.py
-        # Extract model ID from that path.
-        return os.path.basename(os.path.dirname(module.__file__))
 
-    def get_evaluator(self) -> BaseEvaluator:
-        """
-        Gets default model output evaluator for this model.
-        """
-        raise NotImplementedError("This model does not define a default evaluator.")
+class BaseModel(
+    torch.nn.Module,
+    HubModel,
+    FromPretrainedProtocol,
+    ExecutableModelProtocol,
+):
+    """
+    A pre-trained PyTorch model with helpers for submission to AI Hub.
+    """
+
+    def __init__(self):
+        torch.nn.Module.__init__(self)  # Initialize Torch Module
+        HubModel.__init__(self)  # Initialize Hub Model
 
     def convert_to_torchscript(
         self, input_spec: InputSpec | None = None, check_trace: bool = True
@@ -125,7 +123,7 @@ def convert_to_torchscript(
     def convert_to_hub_source_model(
         self,
         target_runtime: TargetRuntime,
-        output_path: str,
+        output_path: str | Path,
         input_spec: InputSpec | None = None,
         check_trace: bool = True,
     ) -> SourceModel:
@@ -135,7 +133,6 @@ def convert_to_hub_source_model(
         # Local import to prevent circular dependency
         from qai_hub_models.utils.inference import prepare_compile_zoo_model_to_hub
 
-        assert isinstance(self, BaseModel)
         source_model, _ = prepare_compile_zoo_model_to_hub(
             self,
             source_model_format=self.preferred_hub_source_model_format(target_runtime),
@@ -152,7 +149,7 @@ def get_hub_compile_options(
         other_compile_options: str = "",
     ) -> str:
         """
-        Convert to a AI Hub source model appropriate for the export method.
+        AI Hub compile options recommended for the model.
         """
         compile_options = ""
         if target_runtime == TargetRuntime.QNN:
@@ -164,71 +161,21 @@ def get_hub_compile_options(
     def preferred_hub_source_model_format(
         self, target_runtime: TargetRuntime
     ) -> SourceModelFormat:
-        return SourceModelFormat.TORCHSCRIPT
-
-    def sample_inputs(self, input_spec: InputSpec | None = None) -> InputsType:
         """
-        Returns a set of sample inputs for the model.
-
-        For each input name in the model, a list of numpy arrays is provided.
-        If the returned set is batch N, all input names must contain exactly N numpy arrays.
-
-        This is a default implementation that returns a single random data array
-        for each input name based on the shapes and dtypes in `get_input_spec`.
-
-        A subclass may choose to override this and fetch a batch of real input data
-        from a data source.
+        Source model format preferred for conversion on AI Hub.
         """
-        if not input_spec:
-            input_spec = self.get_input_spec()
-        inputs_dict = {}
-        inputs_list = make_torch_inputs(input_spec)
-        for i, input_name in enumerate(input_spec.keys()):
-            inputs_dict[input_name] = [inputs_list[i].numpy()]
-        return inputs_dict
-
-
-class FromPrecompiledMixin(ABC):
-    @classmethod
-    @abstractmethod
-    def from_precompiled(
-        cls: Type[FromPrecompiledTypeVar], *args, **kwargs
-    ) -> "FromPrecompiledTypeVar":
-        """
-        Utility function that helps users get up and running with a default
-        precompiled model. While this function may take arguments, all arguments
-        should have default values specified, so that all classes can be invoked
-        with `cls.from_precompiled()` and always have it return something reasonable.
-        """
-        pass
-
-
-class BasePrecompiledModel(FromPrecompiledMixin):
-    @abstractmethod
-    def get_input_spec(self, *args, **kwargs) -> InputSpec:
-        """
-        Returns a map from `{input_name -> (shape, dtype)}`
-        specifying the shape and dtype for each input argument.
-        """
-        pass
+        return SourceModelFormat.TORCHSCRIPT
 
-    def sample_inputs(self, input_spec: InputSpec | None = None) -> InputsType:
-        """
-        Returns a set of sample inputs for the model.
 
-        For each input name in the model, a list of numpy arrays is provided.
-        If the returned set is batch N, all input names must contain exactly N numpy arrays.
+class BasePrecompiledModel(HubModel, FromPrecompiledProtocol):
+    """
+    A pre-compiled hub model.
+    Model PyTorch source is not available, but compiled assets are available.
+    """
 
-        This is a default implementation that returns a single random data array
-        for each input name based on the shapes and dtypes in `get_input_spec`.
+    def __init__(self, target_model_path: str):
+        self.target_model_path = target_model_path
 
-        A subclass may choose to override this and fetch a batch of real input data
-        from a data source.
-        """
-        if not input_spec:
-            input_spec = self.get_input_spec()
-        inputs_dict = {}
-        inputs_list = make_torch_inputs(input_spec)
-        for i, input_name in enumerate(input_spec.keys()):
-            inputs_dict[input_name] = [inputs_list[i].numpy()]
-        return inputs_dict
+    def get_target_model_path(self) -> str:
+        """Get the path to the compiled asset for this model on disk."""
+        return self.target_model_path
diff --git a/qai_hub_models/utils/compare.py b/qai_hub_models/utils/compare.py
index 8d210580..06bd37c8 100644
--- a/qai_hub_models/utils/compare.py
+++ b/qai_hub_models/utils/compare.py
@@ -29,7 +29,9 @@ def torch_inference(
     for i in range(len(list(sample_inputs.values())[0])):
         inputs = {}
         for input_name in input_names:
-            inputs[input_name] = torch.from_numpy(sample_inputs[input_name][i])
+            inputs[input_name] = torch.from_numpy(sample_inputs[input_name][i]).to(
+                "cpu"
+            )
         with torch.no_grad():
             out = model(**inputs)
         out_tuple = (out,) if isinstance(out, torch.Tensor) else out
diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py
index b0d68f0d..06be8ba7 100644
--- a/qai_hub_models/utils/config_loaders.py
+++ b/qai_hub_models/utils/config_loaders.py
@@ -8,7 +8,7 @@
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import requests
 import yaml
@@ -482,6 +482,7 @@ def __init__(
         name: str,
         id: str,
         status: MODEL_STATUS,
+        status_reason: str | None,
         headline: str,
         domain: MODEL_DOMAIN,
         description: str,
@@ -490,6 +491,7 @@ def __init__(
         research_paper: str,
         research_paper_title: str,
         license: str,
+        deploy_license: str,
         source_repo: str,
         applicable_scenarios: List[str],
         related_models: List[str],
@@ -498,12 +500,14 @@ def __init__(
         has_animated_banner: bool,
         code_gen_config: Dict[str, str | bool],
         license_type: str,
+        deploy_license_type: str,
         dataset: List[str],
         technical_details: Dict[str, str],
     ) -> None:
         self.name = name
         self.id = id
         self.status = status
+        self.status_reason = status_reason
         self.headline = headline
         self.domain = domain
         self.description = description
@@ -512,7 +516,9 @@ def __init__(
         self.research_paper = research_paper
         self.research_paper_title = research_paper_title
         self.license = license
+        self.deploy_license = deploy_license
         self.license_type = license_type
+        self.deploy_license_type = deploy_license_type
         self.dataset = dataset
         self.source_repo = source_repo
         self.applicable_scenarios = applicable_scenarios
@@ -569,9 +575,22 @@ def validate(self) -> Tuple[bool, Optional[str]]:
         if self.license_type not in HF_AVAILABLE_LICENSES:
             return False, f"license can be one of these: {HF_AVAILABLE_LICENSES}"
 
-        # Web assets exist
-        if self.status == MODEL_STATUS.PUBLIC and not self.has_static_banner:
-            return False, "All public models must have a static banner."
+        if not self.deploy_license:
+            return False, "deploy_license cannot be empty"
+        if not self.deploy_license_type:
+            return False, "deploy_license_type cannot be empty"
+
+        # Status Reason
+        if self.status == MODEL_STATUS.PRIVATE and not self.status_reason:
+            return (
+                False,
+                "Private models must set `status_reason` in info.yaml with a link to the related issue.",
+            )
+        if self.status == MODEL_STATUS.PUBLIC and self.status_reason:
+            return (
+                False,
+                "`status_reason` in info.yaml should not be set for public models.",
+            )
 
         # Required assets exist
         if self.status == MODEL_STATUS.PUBLIC:
@@ -686,6 +705,7 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None):
             info_yaml["name"],
             info_yaml["id"],
             MODEL_STATUS.from_string(info_yaml["status"]),
+            info_yaml.get("status_reason", None),
             info_yaml["headline"],
             MODEL_DOMAIN.from_string(info_yaml["domain"]),
             info_yaml["description"],
@@ -694,6 +714,7 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None):
             info_yaml["research_paper"],
             info_yaml["research_paper_title"],
             info_yaml["license"],
+            info_yaml["deploy_license"],
             info_yaml["source_repo"],
             info_yaml["applicable_scenarios"],
             info_yaml["related_models"],
@@ -702,34 +723,40 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None):
             info_yaml["has_animated_banner"],
             code_gen_config,
             info_yaml["license_type"],
+            info_yaml["deploy_license_type"],
             info_yaml["dataset"],
             info_yaml["technical_details"],
         )
 
     # Schema for info.yaml
     INFO_YAML_SCHEMA = Schema(
-        {
-            "name": And(str),
-            "id": And(str),
-            "status": And(str),
-            "headline": And(str),
-            "domain": And(str),
-            "description": And(str),
-            "use_case": And(str),
-            "tags": And(lambda s: len(s) >= 0),
-            "research_paper": And(str),
-            "research_paper_title": And(str),
-            "license": And(str),
-            "source_repo": And(str),
-            "technical_details": And(dict),
-            "applicable_scenarios": And(lambda s: len(s) >= 0),
-            "related_models": And(lambda s: len(s) >= 0),
-            "form_factors": And(lambda s: len(s) >= 0),
-            "has_static_banner": And(bool),
-            "has_animated_banner": And(bool),
-            "license_type": And(str),
-            "dataset": And(list),
-        }
+        And(
+            {
+                "name": str,
+                "id": str,
+                "status": str,
+                OptionalSchema("status_reason", default=None): str,
+                "headline": str,
+                "domain": str,
+                "description": str,
+                "use_case": str,
+                "tags": lambda s: len(s) >= 0,
+                "research_paper": str,
+                "research_paper_title": str,
+                "license": str,
+                "deploy_license": str,
+                "source_repo": str,
+                "technical_details": dict,
+                "applicable_scenarios": lambda s: len(s) >= 0,
+                "related_models": lambda s: len(s) >= 0,
+                "form_factors": lambda s: len(s) >= 0,
+                "has_static_banner": bool,
+                "has_animated_banner": bool,
+                "license_type": str,
+                "deploy_license_type": str,
+                "dataset": list,
+            }
+        )
     )
 
     # Schema for code-gen.yaml
@@ -743,8 +770,6 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None):
                 OptionalSchema("tflite_export_failure_reason", default=""): str,
                 OptionalSchema("has_demo", default=True): bool,
                 OptionalSchema("check_trace", default=True): bool,
-                OptionalSchema("default_profile_options", default=""): str,
-                OptionalSchema("default_compile_options", default=""): str,
                 OptionalSchema("channel_last_input", default=""): str,
                 OptionalSchema("channel_last_output", default=""): str,
                 OptionalSchema("outputs_to_skip_validation", default=[]): list,
@@ -754,6 +779,7 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None):
                 OptionalSchema("skip_tests", default=False): bool,
                 OptionalSchema("is_precompiled", default=False): bool,
                 OptionalSchema("no_assets", default=False): bool,
+                OptionalSchema("global_requirements_incompatible", default=False): bool,
                 OptionalSchema("torchscript_opt", default=[]): list,
                 OptionalSchema("inference_metrics", default="psnr"): str,
             }
@@ -761,7 +787,7 @@ def from_yaml(info_path: str | Path, code_gen_path: str | Path | None = None):
     )
 
     @staticmethod
-    def load_info_yaml(path: str | Path):
+    def load_info_yaml(path: str | Path) -> Dict[str, Any]:
         with open(path) as f:
             data = yaml.safe_load(f)
             try:
diff --git a/qai_hub_models/utils/inference.py b/qai_hub_models/utils/inference.py
index f28ca846..ef927a52 100644
--- a/qai_hub_models/utils/inference.py
+++ b/qai_hub_models/utils/inference.py
@@ -6,13 +6,15 @@
 
 import os
 import tempfile
-from typing import List, Tuple
+from pathlib import Path
+from typing import List, Mapping, Tuple
 
 import numpy as np
 import qai_hub as hub
 import torch
 from qai_hub.public_rest_api import DatasetEntries
 
+from qai_hub_models.models.protocols import ExecutableModelProtocol
 from qai_hub_models.utils.asset_loaders import ModelZooAssetConfig
 from qai_hub_models.utils.base_model import BaseModel, SourceModelFormat, TargetRuntime
 from qai_hub_models.utils.input_spec import InputSpec
@@ -32,7 +34,7 @@ def prepare_compile_zoo_model_to_hub(
     model: BaseModel,
     source_model_format: SourceModelFormat,
     target_runtime: TargetRuntime,
-    output_path: str = "",
+    output_path: str | Path = "",
     input_spec: InputSpec | None = None,
     check_trace: bool = True,
     prepare_compile_options_only: bool = False,
@@ -98,7 +100,7 @@ def export_model_func():
         ):
 
             def export_model_func():
-                traced_model = model.convert_to_quantized_torchscript(
+                traced_model = model.convert_to_torchscript(
                     input_spec=input_spec, check_trace=check_trace
                 )
                 model_path = os.path.join(output_path, model_name + ".pt")
@@ -193,7 +195,7 @@ def compile_zoo_model_to_hub(
     )
 
 
-class HubModel:
+class HubModel(ExecutableModelProtocol):
     """
     Class that behaves like a pytorch model except when called, it runs an
         inference job on hub and returns a torch output.
@@ -224,26 +226,27 @@ def __init__(
 
     def __call__(
         self,
-        *input_tensors: torch.Tensor
-        | List[torch.Tensor]
+        *args: torch.Tensor
+        | np.ndarray
+        | List[torch.Tensor | np.ndarray]
+        | hub.Dataset
+        | DatasetEntries,
+    ) -> torch.Tensor | Tuple[torch.Tensor, ...]:
+        return self.forward(*args)
+
+    def forward(
+        self,
+        *args: torch.Tensor
+        | np.ndarray
+        | List[torch.Tensor | np.ndarray]
         | hub.Dataset
         | DatasetEntries,
     ) -> torch.Tensor | Tuple[torch.Tensor, ...]:
-        inputs: hub.Dataset | DatasetEntries
-        if len(input_tensors) == 1 and isinstance(input_tensors[0], hub.Dataset):
-            inputs = input_tensors[0]
-        else:
-            # Upload dataset
-            inputs = {}
-            for name, tensor in zip(self.input_names, input_tensors):
-                if isinstance(tensor, (list, tuple)):
-                    inputs[name] = [t.detach().numpy() for t in tensor]  # type: ignore
-                else:
-                    inputs[name] = [tensor.detach().numpy()]  # type: ignore
         target_runtime = (
             TargetRuntime.QNN if is_qnn_hub_model(self.model) else TargetRuntime.TFLITE
         )
 
+        # Determine whether I/O is channel last
         channel_last_input, channel_last_output = "", ""
         if self.model.producer is not None:
             model_options = self.model.producer.options.strip().split()
@@ -252,14 +255,40 @@ def __call__(
                     channel_last_input = model_options[option_num + 1]
                 if model_options[option_num] == "--force_channel_last_output":
                     channel_last_output = model_options[option_num + 1]
-        if channel_last_input != "":
-            inputs = transpose_channel_first_to_last(
-                channel_last_input, inputs, target_runtime
-            )
+
+        assert len(args) > 0, "At least 1 input should be provided for inference."
+
+        dataset: hub.Dataset | DatasetEntries
+        if isinstance(args[0], hub.Dataset) or isinstance(args[0], Mapping):
+            # Use the existing provided dataset
+            assert len(args) == 1, "Only 1 dataset can be provided for inference."
+            dataset = args[0]
+        else:
+            # Create dataset from input tensors
+            dataset = {}
+            for name, inputs in zip(self.input_names, args):
+                if not isinstance(inputs, (list, tuple)):
+                    inputs = [inputs]  # type: ignore
+
+                converted_inputs = []
+                for input in inputs:
+                    if isinstance(input, np.ndarray):
+                        converted_inputs.append(input)
+                    elif isinstance(input, torch.Tensor):
+                        converted_inputs.append(input.detach().numpy())
+                    else:
+                        raise NotImplementedError(f"Unknown input type: {str(inputs)}")
+                dataset[name] = converted_inputs
+
+            # Transpose dataset I/O if necessary to fit with the on-device model format
+            if channel_last_input:
+                dataset = transpose_channel_first_to_last(
+                    channel_last_input, dataset, target_runtime
+                )
 
         inference_job = hub.submit_inference_job(
             model=self.model,
-            inputs=inputs,
+            inputs=dataset,
             device=self.device,
             name=f"{self.model.name}_demo_inference",
             options=self.inference_options,
@@ -273,7 +302,7 @@ def __call__(
         assert output_ds_handle is not None
         output_dataset = output_ds_handle.download()
 
-        if channel_last_output != "":
+        if channel_last_output:
             output_dataset = transpose_channel_last_to_first(
                 channel_last_output,
                 output_dataset,  # type: ignore
diff --git a/qai_hub_models/utils/input_spec.py b/qai_hub_models/utils/input_spec.py
index 0944724f..7d325dc7 100644
--- a/qai_hub_models/utils/input_spec.py
+++ b/qai_hub_models/utils/input_spec.py
@@ -24,7 +24,7 @@ def make_torch_inputs(spec: InputSpec, seed: Optional[int] = 42) -> List[torch.T
     torch_input = []
     generator = None
     if seed is not None:
-        generator = torch.Generator()
+        generator = torch.Generator(device="cpu")
         generator.manual_seed(seed)
     for sp in spec.values():
         torch_dtype = str_to_torch_dtype(sp[1])
diff --git a/qai_hub_models/utils/measurement.py b/qai_hub_models/utils/measurement.py
index cf51c776..2c4a8f21 100644
--- a/qai_hub_models/utils/measurement.py
+++ b/qai_hub_models/utils/measurement.py
@@ -11,6 +11,7 @@
 
 import numpy as np
 import qai_hub as hub
+from tflite import Model as TFModel  # type: ignore
 
 
 def display_with_sig_figs(num: float, num_sig_figs: int = 3) -> str:
@@ -74,23 +75,22 @@ def get_tflite_unique_parameters(
     tensors that point to the same buffers. So, we keep track of all buffers
     we have counted through tensors.
     """
-    from tensorflow.lite.python import schema_py_generated as schema_fb
-
     with open(model_path, "rb") as f:
         tflite_model = f.read()
-    model_obj = schema_fb.Model.GetRootAsModel(tflite_model, 0)
-    model = schema_fb.ModelT.InitFromObj(model_obj)
+    model = TFModel.GetRootAs(tflite_model, 0)
 
     parameter_cnt = 0
     buffers_counted = set()
-    for graph in model.subgraphs:
-        for tensor in graph.tensors:
-            buf_index = tensor.buffer
-
-            buffer = model.buffers[buf_index]
-            if buffer.data is not None:
+    for i in range(model.SubgraphsLength()):
+        graph = model.Subgraphs(i)
+        for j in range(graph.TensorsLength()):
+            tensor = graph.Tensors(j)
+            buf_index = tensor.Buffer()
+
+            buffer = model.Buffers(buf_index)
+            if not buffer.DataIsNone():
                 if buf_index not in buffers_counted:
-                    parameter_cnt += np.prod(tensor.shape)
+                    parameter_cnt += np.prod(tensor.ShapeAsNumpy())
                     buffers_counted.add(buf_index)
 
     if not as_str:
diff --git a/qai_hub_models/utils/model_card.py b/qai_hub_models/utils/model_card.py
index 91682146..d9831a77 100644
--- a/qai_hub_models/utils/model_card.py
+++ b/qai_hub_models/utils/model_card.py
@@ -48,6 +48,7 @@ class ModelRun:
     model_id: str
     profile_job_id: str
     runtime: MODEL_CARD_RUNTIMES
+    device_type: str
 
     def chipset(self) -> Optional[str]:
         """Chipset the job was run on."""
@@ -62,7 +63,9 @@ def chipset(self) -> Optional[str]:
     def profile_job(self):
         """Get the hub.ProfileJob object."""
         if len(self.profile_job_id) > 0:
-            return hub.get_job(self.profile_job_id)
+            job = hub.get_job(self.profile_job_id)
+            job.wait()
+            return job
         return None
 
     def job_status(self) -> str:
@@ -77,7 +80,12 @@ def job_status(self) -> str:
     @property
     def quantized(self) -> str:
         """Quantized models are marked so precision can be correctly recorded."""
-        return "Yes" if self.model_id.endswith("_quantized") else "No"
+        return (
+            "Yes"
+            if self.model_id.endswith("Quantized")
+            or self.model_id.endswith("Quantizable")
+            else "No"
+        )
 
     @property
     def profile_results(self):
@@ -163,18 +171,82 @@ def precision(self) -> str:
             return "fp16"
         return "null"
 
+    def performance_metrics(self) -> Dict[str, Any]:
+        return dict(
+            inference_time=self.get_inference_time(),
+            throughput=self.get_throughput(),
+            estimated_peak_memory_range=self.get_peak_memory_range(),
+            primary_compute_unit=self.primary_compute_unit(),
+            precision=self.precision(),
+            layer_info=dict(
+                layers_on_npu=self.npu(),
+                layers_on_gpu=self.gpu(),
+                layers_on_cpu=self.cpu(),
+                total_layers=self.total(),
+            ),
+            job_id=self.profile_job_id,
+            job_status=self.job_status(),
+        )
+
+    def reference_device_info(self) -> Dict[str, str]:
+        """Return a reference ID."""
+        REF_DEVICE_MAP = {
+            "s23": ("qualcomm-snapdragon-8gen2", "Samsung Galaxy S23"),
+            "s24": ("qualcomm-snapdragon-8gen3", "Samsung Galaxy S24"),
+        }
+        chipset = REF_DEVICE_MAP[self.device_type][0]
+        hub_device = hub.get_devices(REF_DEVICE_MAP[self.device_type][1])[0]
+        device_name = hub_device.name
+        os_version = hub_device.os
+        os_name, form_factor, manufacturer = "", "", ""
+        for attr in hub_device.attributes:
+            if attr.startswith("vendor"):
+                manufacturer = attr.split(":")[-1]
+            if attr.startswith("format"):
+                form_factor = attr.split(":")[-1]
+            if attr.startswith("os"):
+                os_name = attr.split(":")[-1].capitalize()
+        chipset = chipset_marketting_name(chipset)
+        device_info = dict(
+            name=device_name,
+            os=os_version,
+            form_factor=form_factor.capitalize(),
+            os_name=os_name,
+            manufacturer=manufacturer.capitalize(),
+            chipset=chipset,
+        )
+        return device_info
+
 
 @dataclass
 class ModelPerf:
     model_runs: List[ModelRun]
 
-    def supported_chipsets(self, chips) -> List[str]:
+    def supported_chipsets(self, chips: List[str]) -> List[str]:
         """Return all the supported chipsets given the chipset it works on."""
-        supported_chips = chips
+
+        # Don't assign "chips" directly to supported_chips.
+        # The lists will share the same pointer, and hence the for
+        # loop below will break.
+        supported_chips = []
+        supported_chips.extend(chips)
+
         for chip in chips:
+            if chip == "qualcomm-snapdragon-8gen3":
+                supported_chips.extend(
+                    [
+                        "qualcomm-snapdragon-8gen2",
+                        "qualcomm-snapdragon-8gen1",
+                        "qualcomm-snapdragon-888",
+                    ]
+                )
             if chip == "qualcomm-snapdragon-8gen2":
                 supported_chips.extend(
-                    ["qualcomm-snapdragon-8gen1", "qualcomm-snapdragon-888"]
+                    [
+                        "qualcomm-snapdragon-8gen3",
+                        "qualcomm-snapdragon-8gen1",
+                        "qualcomm-snapdragon-888",
+                    ]
                 )
             if chip == "qualcomm-snapdragon-855":
                 supported_chips.extend(
@@ -222,31 +294,6 @@ def supported_oses(self) -> List[str]:
         """Return all the supported operating systems."""
         return ["Android"]
 
-    def reference_device_info(self) -> Dict[str, str]:
-        """Return a reference ID."""
-        chipset = "qualcomm-snapdragon-8gen2"
-        hub_device = hub.get_devices("Samsung Galaxy S23 Ultra")[0]
-        device_name = hub_device.name
-        os_version = hub_device.os
-        os_name, form_factor, manufacturer = "", "", ""
-        for attr in hub_device.attributes:
-            if attr.startswith("vendor"):
-                manufacturer = attr.split(":")[-1]
-            if attr.startswith("format"):
-                form_factor = attr.split(":")[-1]
-            if attr.startswith("os"):
-                os_name = attr.split(":")[-1].capitalize()
-        chipset = chipset_marketting_name(chipset)
-        device_info = dict(
-            name=device_name,
-            os=os_version,
-            form_factor=form_factor.capitalize(),
-            os_name=os_name,
-            manufacturer=manufacturer.capitalize(),
-            chipset=chipset,
-        )
-        return device_info
-
     def performance_metrics(self):
         """Performance metrics as per model card."""
         perf_card = dict()
@@ -254,11 +301,14 @@ def performance_metrics(self):
         # Figure out unique models in various baselines
         unique_model_ids = []
         chips = []
+        devices = []
         for run in self.model_runs:
             if run.model_id not in unique_model_ids:
                 unique_model_ids.append(run.model_id)
             if run.chipset not in chips:
                 chips.append(run.chipset())
+            if run.device_type not in devices:
+                devices.append(run.device_type)
 
         perf_card["aggregated"] = dict(
             supported_oses=self.supported_oses(),
@@ -269,36 +319,32 @@ def performance_metrics(self):
         perf_per_model = []
 
         for mid in unique_model_ids:
-            perf_per_device = []
             # Calculate per data per runtime
-            perf_per_runtime = dict()
+            perf_per_device = dict()
             for run in self.model_runs:
                 if run.model_id == mid:
-                    runtime_name = run.runtime.name.lower()
-                    perf_per_runtime[runtime_name] = dict(
-                        inference_time=run.get_inference_time(),
-                        throughput=run.get_throughput(),
-                        estimated_peak_memory_range=run.get_peak_memory_range(),
-                        primary_compute_unit=run.primary_compute_unit(),
-                        precision=run.precision(),
-                        layer_info=dict(
-                            layers_on_npu=run.npu(),
-                            layers_on_gpu=run.gpu(),
-                            layers_on_cpu=run.cpu(),
-                            total_layers=run.total(),
-                        ),
-                        job_id=run.profile_job_id,
-                        job_status=run.job_status(),
-                    )
-
-            # Per model, the device used and timestamp for model card
-            perf_per_runtime["reference_device_info"] = self.reference_device_info()
-            perf_per_runtime["timestamp"] = datetime.datetime.utcnow().isoformat() + "Z"
-
-            perf_per_device.append(perf_per_runtime)
-
-            perf_model = dict(name=mid, performance_metrics=perf_per_device)
-            perf_model["name"] = mid
+                    for dev in devices:
+                        if run.device_type == dev:
+                            # perf_per_runtime = dict()
+                            if dev not in perf_per_device:
+                                perf_per_device[dev] = dict()
+                            runtime_name = run.runtime.name.lower()
+                            perf_per_device[dev][
+                                runtime_name
+                            ] = run.performance_metrics()
+                            # Per model, the device used and timestamp for model card
+                            if "reference_device_info" not in perf_per_device[dev]:
+                                perf_per_device[dev][
+                                    "reference_device_info"
+                                ] = run.reference_device_info()
+
+                            perf_per_device[dev]["timestamp"] = (
+                                datetime.datetime.utcnow().isoformat() + "Z"
+                            )
+
+                perf_model = dict(
+                    name=mid, performance_metrics=list(perf_per_device.values())
+                )
             perf_per_model.append(perf_model)
 
         # Perf card with multiple models
diff --git a/qai_hub_models/utils/path_helpers.py b/qai_hub_models/utils/path_helpers.py
index e1157e1b..2dc4a50f 100644
--- a/qai_hub_models/utils/path_helpers.py
+++ b/qai_hub_models/utils/path_helpers.py
@@ -5,11 +5,13 @@
 from pathlib import Path
 from typing import Optional
 
+from qai_hub_models.utils.asset_loaders import load_yaml
+
 MODELS_PACKAGE_NAME = "models"
 QAIHM_PACKAGE_NAME = "qai_hub_models"
 
 
-def get_all_models():
+def get_all_models(public_only: bool = False):
     zoo_root = get_qaihm_models_root()
     all_models = []
     for subdir in zoo_root.iterdir():
@@ -17,6 +19,11 @@ def get_all_models():
             continue
         # Heuristic to see if this is a model we should generate export.py for.
         if (subdir / "model.py").exists() and (subdir / "test.py").exists():
+            if public_only:
+                if not (subdir / "info.yaml").exists():
+                    continue
+                if load_yaml(subdir / "info.yaml").get("status") != "public":
+                    continue
             all_models.append(subdir.name)
     return all_models
 
diff --git a/qai_hub_models/utils/perf_summary.py b/qai_hub_models/utils/perf_summary.py
index 38202f39..13edb3c6 100644
--- a/qai_hub_models/utils/perf_summary.py
+++ b/qai_hub_models/utils/perf_summary.py
@@ -70,91 +70,100 @@ def update_summary(self, model_id: str, previous_report, new_report):
         new_perf_metrics = {}
 
         # Create chipset to perf metric
-        for i in range(len(previous_report["models"])):
-            for j in range(len(new_report["models"])):
-                if (
-                    previous_report["models"][i]["name"]
-                    == new_report["models"][j]["name"]
-                ):
-                    for prev_metric in previous_report["models"][i][
-                        "performance_metrics"
-                    ]:
-                        if "chipset" in prev_metric["reference_device_info"]:
-                            ref_device = prev_metric["reference_device_info"]["chipset"]
-                            prev_perf_metrics[ref_device] = prev_metric
-
-                    for new_metric in new_report["models"][j]["performance_metrics"]:
-                        if "chipset" in new_metric["reference_device_info"]:
-                            ref_device = new_metric["reference_device_info"]["chipset"]
-                            new_perf_metrics[ref_device] = new_metric
-
-        if len(prev_perf_metrics) == 0 or len(new_perf_metrics) == 0:
-            self.empty_perf_report.append((model_id,))
-
-        for device in prev_perf_metrics.keys():
-            device_info = prev_perf_metrics[device]["reference_device_info"]
-            if device_info["os_name"] not in self.tracked_oses:
-                continue
-
-            # Case 3: Chipset is missing in new data
-            if device not in new_perf_metrics:
-                self.missing_devices.append((model_id, device))
-                continue
-
-            for runtime_type in RUNTIMES_TO_COMPARE:
-                prev_inference_time = prev_perf_metrics[device][runtime_type][
-                    "inference_time"
-                ]
-                new_inference_time = new_perf_metrics[device][runtime_type][
-                    "inference_time"
-                ]
-                if new_inference_time == prev_inference_time:
+        if previous_report is not None and new_report is not None:
+            for i in range(len(previous_report["models"])):
+                for j in range(len(new_report["models"])):
+                    if (
+                        previous_report["models"][i]["name"]
+                        == new_report["models"][j]["name"]
+                    ):
+                        for prev_metric in previous_report["models"][i][
+                            "performance_metrics"
+                        ]:
+                            if "chipset" in prev_metric["reference_device_info"]:
+                                ref_device = prev_metric["reference_device_info"][
+                                    "chipset"
+                                ]
+                                prev_perf_metrics[ref_device] = prev_metric
+
+                        for new_metric in new_report["models"][j][
+                            "performance_metrics"
+                        ]:
+                            if "chipset" in new_metric["reference_device_info"]:
+                                ref_device = new_metric["reference_device_info"][
+                                    "chipset"
+                                ]
+                                new_perf_metrics[ref_device] = new_metric
+
+            if len(prev_perf_metrics) == 0 or len(new_perf_metrics) == 0:
+                self.empty_perf_report.append((model_id,))
+
+            for device in prev_perf_metrics.keys():
+                device_info = prev_perf_metrics[device]["reference_device_info"]
+                if device_info["os_name"] not in self.tracked_oses:
                     continue
 
-                if new_inference_time == "null" or prev_inference_time == "null":
-                    # Case 1: Model either failed to infer or had a successful run
-                    summary_entry = (
-                        model_id,
-                        runtime_type,
-                        "inf",
-                        self._format_speedup(new_inference_time),
-                        self._format_speedup(prev_inference_time),
-                        device_info["chipset"],
-                        device_info["os"],
-                    )
-
-                    if new_inference_time == "null":
-                        self.regressions["inf"].append(summary_entry)
-                    else:
-                        self.progressions["inf"].append(summary_entry)
+                # Case 3: Chipset is missing in new data
+                if device not in new_perf_metrics:
+                    self.missing_devices.append((model_id, device))
                     continue
 
-                # Case 2: Bucketize speedup difference
-                progression_speedup = float(prev_inference_time) / float(
-                    new_inference_time
-                )
-                regression_speedup = float(new_inference_time) / float(
-                    prev_inference_time
-                )
-                is_progression = progression_speedup >= 1
-                speedup = progression_speedup if is_progression else regression_speedup
-
-                for bucket in self.perf_buckets[1:]:
-                    if bucket <= speedup:
-                        summary = (
+                for runtime_type in RUNTIMES_TO_COMPARE:
+                    prev_inference_time = prev_perf_metrics[device][runtime_type][
+                        "inference_time"
+                    ]
+                    new_inference_time = new_perf_metrics[device][runtime_type][
+                        "inference_time"
+                    ]
+                    if new_inference_time == prev_inference_time:
+                        continue
+
+                    if new_inference_time == "null" or prev_inference_time == "null":
+                        # Case 1: Model either failed to infer or had a successful run
+                        summary_entry = (
                             model_id,
                             runtime_type,
-                            self._format_speedup(speedup),
+                            "inf",
                             self._format_speedup(new_inference_time),
                             self._format_speedup(prev_inference_time),
                             device_info["chipset"],
                             device_info["os"],
                         )
-                        if is_progression:
-                            self.progressions[bucket].append(summary)
+
+                        if new_inference_time == "null":
+                            self.regressions["inf"].append(summary_entry)
                         else:
-                            self.regressions[bucket].append(summary)
-                        break
+                            self.progressions["inf"].append(summary_entry)
+                        continue
+
+                    # Case 2: Bucketize speedup difference
+                    progression_speedup = float(prev_inference_time) / float(
+                        new_inference_time
+                    )
+                    regression_speedup = float(new_inference_time) / float(
+                        prev_inference_time
+                    )
+                    is_progression = progression_speedup >= 1
+                    speedup = (
+                        progression_speedup if is_progression else regression_speedup
+                    )
+
+                    for bucket in self.perf_buckets[1:]:
+                        if bucket <= speedup:
+                            summary = (
+                                model_id,
+                                runtime_type,
+                                self._format_speedup(speedup),
+                                self._format_speedup(new_inference_time),
+                                self._format_speedup(prev_inference_time),
+                                device_info["chipset"],
+                                device_info["os"],
+                            )
+                            if is_progression:
+                                self.progressions[bucket].append(summary)
+                            else:
+                                self.regressions[bucket].append(summary)
+                            break
 
     def _get_summary_table(self, bucket_id, get_progressions=True):
         """
diff --git a/qai_hub_models/utils/printing.py b/qai_hub_models/utils/printing.py
index 2e20827f..5efd51ec 100644
--- a/qai_hub_models/utils/printing.py
+++ b/qai_hub_models/utils/printing.py
@@ -9,7 +9,7 @@
 import numpy as np
 import qai_hub as hub
 from prettytable import PrettyTable
-from qai_hub.client import SourceModelType
+from qai_hub.client import DatasetEntries, SourceModelType
 from tabulate import tabulate
 
 from qai_hub_models.utils.base_model import TargetRuntime
@@ -22,7 +22,7 @@
 
 def print_inference_metrics(
     inference_job: hub.InferenceJob,
-    inference_result: Dict[str, List[np.ndarray]],
+    inference_result: DatasetEntries,
     torch_out: List[np.ndarray],
     outputs_to_skip: Optional[List[int]] = None,
     metrics: str = "psnr",
diff --git a/qai_hub_models/utils/qai_hub_helpers.py b/qai_hub_models/utils/qai_hub_helpers.py
index c44f6085..89deb1d6 100644
--- a/qai_hub_models/utils/qai_hub_helpers.py
+++ b/qai_hub_models/utils/qai_hub_helpers.py
@@ -5,7 +5,8 @@
 from __future__ import annotations
 
 import os
-from typing import Any, Dict, List, Union
+from pathlib import Path
+from typing import Dict, List
 
 import numpy as np
 import qai_hub as hub
@@ -20,8 +21,8 @@
 
 def transpose_channel(
     io_names: str,
-    inputs: Union[hub.Dataset, Dict[str, Any]],
-    target_runtime: "TargetRuntime",
+    inputs: hub.client.DatasetEntries,
+    target_runtime: TargetRuntime,
     first_to_last: bool,
 ):
 
@@ -29,7 +30,6 @@ def transpose_channel(
     io_names_list = io_names.strip().split(",")
     target = dict()
 
-    assert isinstance(inputs, dict)
     for name, array in inputs.items():
         if len(array[0].shape) < min_dim or len(array[0].shape) > 5:
             target[name] = array
@@ -47,16 +47,16 @@ def transpose_channel(
 
 def transpose_channel_first_to_last(
     io_names: str,
-    sample_inputs: Union[hub.Dataset, Dict[str, Any]],
-    target_runtime: "TargetRuntime",
+    sample_inputs: hub.client.DatasetEntries,
+    target_runtime: TargetRuntime,
 ) -> Dict[str, List[np.ndarray]]:
     return transpose_channel(io_names, sample_inputs, target_runtime, True)
 
 
 def transpose_channel_last_to_first(
     io_names: str,
-    job_outputs: Union[hub.Dataset, Dict[str, Any]],
-    target_runtime: "TargetRuntime",
+    job_outputs: hub.client.DatasetEntries,
+    target_runtime: TargetRuntime,
 ) -> Dict[str, List[np.ndarray]]:
     return transpose_channel(io_names, job_outputs, target_runtime, False)
 
@@ -85,12 +85,12 @@ def export_without_hub_access(
     skip_inferencing: bool,
     skip_downloading: bool,
     skip_summary: bool,
-    output_path: str,
+    output_path: str | Path,
     target_runtime: TargetRuntime,
     compile_options: str,
     profile_options: str,
     components: List[str] | None = None,
-) -> List[str] | None:
+) -> List[str]:
     print(_WARNING_DASH)
     print(
         f"Unable to find a valid API token for {_AIHUB_NAME}. Using results from a previous job run on the same device.\n"
diff --git a/qai_hub_models/utils/qnn_helpers.py b/qai_hub_models/utils/qnn_helpers.py
index 51d9e255..d437e6df 100644
--- a/qai_hub_models/utils/qnn_helpers.py
+++ b/qai_hub_models/utils/qnn_helpers.py
@@ -6,11 +6,12 @@
 
 import json
 from pathlib import Path
-from typing import Dict, List
+from typing import Dict
 
-import torch
 from qai_hub.client import Job, Model, SourceModelType
 
+from qai_hub_models.models.common import SampleInputsType
+
 
 def onnx_elem_type_to_str(elem_type: int) -> str:
     if elem_type == 1:
@@ -33,7 +34,7 @@ def load_encodings(output_path: Path, model_name: str) -> Dict:
     return encodings["activation_encodings"]
 
 
-def get_qnn_inputs(compile_job: Job, sample_inputs: Dict[str, List[torch.Tensor]]):
+def get_qnn_inputs(compile_job: Job, sample_inputs: SampleInputsType):
     compile_job.target_shapes
     return dict(zip(compile_job.target_shapes.keys(), sample_inputs.values()))
 
diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py
index 40842fa1..56517e76 100644
--- a/qai_hub_models/utils/quantization_aimet.py
+++ b/qai_hub_models/utils/quantization_aimet.py
@@ -2,12 +2,24 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+"""
+Items defined in this file require that AIMET be installed.
+"""
 from __future__ import annotations
 
+import logging
+import os
+
 try:
+    from aimet_common.utils import AimetLogger  # type: ignore
     from aimet_torch import onnx_utils
     from aimet_torch.qc_quantize_op import QcQuantizeWrapper
     from aimet_torch.quantsim import QuantizationSimModel
+    from aimet_torch.tensor_quantizer import StaticGridPerTensorQuantizer
+
+    # Suppress aimet info logs within zoo
+    if not os.environ.get("SHOW_AIMET_LOGS"):
+        AimetLogger.set_level_for_all_areas(logging.WARN)
 except (ImportError, ModuleNotFoundError):
     raise NotImplementedError(
         "AIMET must be installed to load quantized models. "
@@ -16,11 +28,10 @@
         "https://quic.github.io/aimet-pages/releases/latest/install/index.html"
     )
 
-import os
 import shutil
 import tempfile
 from pathlib import Path
-from typing import Any
+from typing import Any, List
 from zipfile import ZipFile
 
 import torch
@@ -31,26 +42,80 @@
     _DataLoader,
     _for_each_batch,
 )
-from qai_hub_models.utils.base_model import (
-    BaseModel,
-    InputSpec,
-    SourceModelFormat,
-    TargetRuntime,
+from qai_hub_models.models._shared.common import apply_module_function_recursively
+from qai_hub_models.models.common import SourceModelFormat, TargetRuntime
+from qai_hub_models.models.protocols import (
+    EvalModelProtocol,
+    HubModelProtocol,
+    QuantizableModelProtocol,
 )
-from qai_hub_models.utils.input_spec import make_torch_inputs
+from qai_hub_models.utils.input_spec import InputSpec, make_torch_inputs
+
+
+def tie_aimet_observer_groups(groups: List[List[Any]]):
+    """
+    This defines groups of ops that all should use the same output
+    quantizer observer. The input groups is a list of lists, where the
+    inner lists contain op references that should all use the same output
+    quantizer. Each op should have an `output_quantizers` member.
+
+    Example:
+
+        groups = [
+            [
+                sim.model.net.maxpool2,
+                sim.model.net.Mixed_5b.module_avg_pool2d,
+            ],
+        ]
+        _tie_aimet_observer_groups(groups)
+    """
+    for group in groups:
+        output_quantizer = group[0].output_quantizers[0]
+        for op in group[1:]:
+            op.output_quantizers[0] = output_quantizer
+
+
+def convert_all_depthwise_to_per_tensor(module):
+    """
+    This recursively iterates a PyTorch module (that has been prepared by
+    AIMET for quantization) and replaces the weight quantizers with a
+    per-tensor for all depthwise convolutions. All parameters (bitwidth,
+    round_mode, etc.) are copied over from the existing quantizer.
+    """
 
+    # Please see #9842 for context
+    def convert_depthwise_to_per_tensor(op, parent_module, name):
+        # Only convert depthwise
+        if op.groups > 1 and op.out_channels == op.groups:
+            quantizers = parent_module.param_quantizers
+            for key in ["weight", "bias"]:
+                quantizer = quantizers[key]
+                quantizers[key] = StaticGridPerTensorQuantizer(
+                    bitwidth=quantizer.bitwidth,
+                    round_mode=quantizer.round_mode,
+                    quant_scheme=quantizer.quant_scheme,
+                    use_symmetric_encodings=quantizer.use_symmetric_encodings,
+                    enabled_by_default=quantizer.enabled,
+                )
+
+    apply_module_function_recursively(
+        module, torch.nn.Conv2d, convert_depthwise_to_per_tensor
+    )
 
-class AIMETQuantizableMixin:
+
+class AIMETQuantizableMixin(HubModelProtocol, QuantizableModelProtocol):
     """
-    This mixin provides quantization support with Qualcomm's AIMET package.
+    Mixin that allows a model to be quantized & exported to disk using AIMET.
+
+    Inheritor must implement HubModel for this mixin to function.
     """
 
     def __init__(
         self,
-        sim_model: QuantizationSimModel,
+        quant_sim: QuantizationSimModel,
         needs_onnx_direct_aimet_export: bool = False,
     ):
-        self.quant_sim = sim_model
+        self.quant_sim = quant_sim
         self.needs_onnx_direct_aimet_export = needs_onnx_direct_aimet_export
 
     def preferred_hub_source_model_format(
@@ -70,7 +135,7 @@ def quantize(
         requantize_model_weights=False,
     ) -> float | None:
         """
-        Re-compute quantization encodings for this model with the given dataset and model evaluator.
+        Compute quantization encodings for this model with the given dataset and model evaluator.
 
         This model will be updated with a new set of quantization parameters. Future calls to
         forward() and export_...() will take these quantization parameters into account.
@@ -104,8 +169,7 @@ def quantize(
         Returns:
             If an evaluator is provided, returns its accuracy score. No return value otherwise.
         """
-        assert isinstance(self, BaseModel)
-        if not evaluator:
+        if not evaluator and isinstance(self, EvalModelProtocol):
             evaluator = self.get_evaluator()
 
         # Enable or disable quantization for model parameters (model weights).
@@ -149,45 +213,36 @@ def convert_to_torchscript_and_aimet_encodings(
         input_spec: InputSpec | None = None,
         model_name: str | None = None,
     ) -> str:
-        """
-        Converts the torch module to a zip file containing an
-        unquantized torchscript trace and an aimet quantization encodings file.
-        """
         if model_name is None:
             model_name = self.__class__.__name__
         if not input_spec:
-            input_spec = self._get_input_spec_ts()
+            input_spec = self.get_input_spec()
 
         os.makedirs(output_dir, exist_ok=True)
         zip_path = os.path.join(output_dir, f"{model_name}.aimet.zip")
         base_dir = Path(f"{model_name}.aimet")
-        base_path = Path(output_dir) / base_dir
-        if base_path.exists():
-            shutil.rmtree(base_path)
-        os.makedirs(base_path)
-        self.quant_sim.export(
-            str(base_path),
-            model_name,
-            tuple(make_torch_inputs(input_spec)),
-            export_to_torchscript=True,
-        )
-
-        # AIMET exports GraphModule. Convert it to ScriptModule
-        fx_graph_path = base_path / f"{model_name}.pth"
-        fx_graph = torch.load(fx_graph_path)
-        script_module = torch.jit.trace(fx_graph, tuple(make_torch_inputs(input_spec)))
-        torch.jit.save(script_module, base_path / f"{model_name}.pt")
-
-        with ZipFile(zip_path, "w") as zip_object:
-            zip_object.write(base_path, base_dir)
-            zip_object.write(
-                base_path / f"{model_name}.pt", base_dir / f"{model_name}.pt"
-            )
-            zip_object.write(
-                base_path / f"{model_name}_torch.encodings",
-                base_dir / f"{model_name}_torch.encodings",
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            base_path = Path(tmpdir) / base_dir
+            os.makedirs(base_path)
+            self.quant_sim.export(
+                str(base_path),
+                model_name,
+                tuple(make_torch_inputs(input_spec)),
+                export_to_torchscript=True,
             )
 
+            with ZipFile(zip_path, "w") as zip_object:
+                zip_object.write(base_path, base_dir)
+                zip_object.write(
+                    base_path / f"{model_name}.torchscript.pth",
+                    base_dir / f"{model_name}.pt",
+                )
+                zip_object.write(
+                    base_path / f"{model_name}_torch.encodings",
+                    base_dir / f"{model_name}_torch.encodings",
+                )
+
         return zip_path
 
     def convert_to_onnx_and_aimet_encodings(
@@ -203,52 +258,45 @@ def convert_to_onnx_and_aimet_encodings(
         if model_name is None:
             model_name = self.__class__.__name__
         if not input_spec:
-            input_spec = self._get_input_spec_ts()
+            input_spec = self.get_input_spec()
 
         os.makedirs(output_dir, exist_ok=True)
         zip_path = os.path.join(output_dir, f"{model_name}.aimet.zip")
         base_dir = Path(f"{model_name}.aimet")
-        base_path = Path(output_dir) / base_dir
-        if base_path.exists():
-            shutil.rmtree(base_path)
-        os.makedirs(base_path)
-
-        onnx_utils.EXPORT_TO_ONNX_DIRECT = self.needs_onnx_direct_aimet_export
-        self.quant_sim.export(
-            str(base_path),
-            model_name,
-            tuple(make_torch_inputs(input_spec)),
-            onnx_export_args=dict(input_names=[name for name in input_spec]),
-        )
-
-        onnx_file_name = f"{model_name}.onnx"
-        encodings_file_name = f"{model_name}.encodings"
-        with ZipFile(zip_path, "w") as zip_object:
-            zip_object.write(base_path, base_dir)
-            zip_object.write(
-                base_path / onnx_file_name, os.path.join(base_dir, onnx_file_name)
-            )
-            zip_object.write(
-                base_path / encodings_file_name,
-                os.path.join(base_dir, encodings_file_name),
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            base_path = Path(tmpdir) / base_dir
+            if base_path.exists():
+                shutil.rmtree(base_path)
+            os.makedirs(base_path)
+
+            onnx_utils.EXPORT_TO_ONNX_DIRECT = self.needs_onnx_direct_aimet_export
+            self.quant_sim.export(
+                str(base_path),
+                model_name,
+                tuple(make_torch_inputs(input_spec)),
+                onnx_export_args=dict(input_names=[name for name in input_spec]),
             )
 
-        return zip_path
+            onnx_file_name = f"{model_name}.onnx"
+            encodings_file_name = f"{model_name}.encodings"
+            with ZipFile(zip_path, "w") as zip_object:
+                zip_object.write(base_path, base_dir)
+                zip_object.write(
+                    base_path / onnx_file_name, os.path.join(base_dir, onnx_file_name)
+                )
+                zip_object.write(
+                    base_path / encodings_file_name,
+                    os.path.join(base_dir, encodings_file_name),
+                )
 
-    def convert_to_torchscript(*args, **kwargs):
-        """Block users from calling convert_to_torchscript() on quantized models, since python will call both parent classes."""
-        raise NotImplementedError(
-            "This model is quantized. Use `model.convert_to_quantized_torchscript` instead!"
-        )
+        return zip_path
 
-    def convert_to_quantized_torchscript(
+    def convert_to_torchscript(
         self, input_spec: InputSpec | None = None, check_trace: bool = True
     ) -> Any:
-        """
-        Converts the torch module to a quantized torchscript trace.
-        """
         if not input_spec:
-            input_spec = self._get_input_spec_ts()
+            input_spec = self.get_input_spec()
 
         with tempfile.TemporaryDirectory() as tempdir:
             self.quant_sim.export(
@@ -267,26 +315,8 @@ def get_calibration_data(
     ) -> DatasetEntries | None:
         """
         Calibration dataset for this model and input spec.
-        Default behavior is randomized input in range [0, 1].
         """
         if not input_spec:
-            input_spec = self._get_input_spec_ts()
+            input_spec = self.get_input_spec()
         inputs = make_torch_inputs(input_spec)
         return {k: v.numpy() for k, v in zip(input_spec.keys(), inputs)}
-
-    def _get_input_spec_ts(self, *args, **kwargs) -> InputSpec:
-        """Type safe version of get_input_spec."""
-        assert isinstance(self, BaseModel)
-        return self.get_input_spec(*args, **kwargs)
-
-
-class HubCompileOptionsInt8Mixin:
-    def get_hub_compile_options(
-        self,
-        target_runtime: TargetRuntime,
-        other_compile_options: str = "",
-    ) -> str:
-        compile_options = super().get_hub_compile_options(  # type: ignore
-            target_runtime, other_compile_options
-        )
-        return compile_options + " --quantize_full_type int8 --quantize_io"
diff --git a/scripts/build_and_test.py b/scripts/build_and_test.py
index 78a7a6af..28dc8875 100755
--- a/scripts/build_and_test.py
+++ b/scripts/build_and_test.py
@@ -311,8 +311,19 @@ def test_changed_models(
             # but no model definitions actually changed. That means this was a mass-change
             # to the export scripts.
             #
-            # Just use 1 model as a sample to test the export. This makes CI significantly faster.
-            export_models = set([next(iter(export_changed_models))])
+            # Test a representative set of models.
+            # One regular model, one aimet, one components, and one non-image input.
+            # These are among the smallest instances of each of these.
+            # If none of these models were changed, test one model.
+            representative_set = [
+                "sinet",
+                "quicksrnetsmall_quantized",
+                "mediapipe_face",
+                "facebook_denoiser",
+            ]
+            export_models = export_changed_models & set(representative_set)
+            if len(export_models) == 0:
+                export_models = set([next(iter(export_changed_models))])
         else:
             export_models = set()
 
@@ -333,6 +344,7 @@ def test_changed_models(
                 self.venv_path,
                 venv_for_each_model=False,
                 use_shared_cache=True,
+                test_trace=False,
             ),
         )
 
@@ -373,6 +385,18 @@ def test_all_models(self, plan: Plan, step_id: str = "test_all_models") -> str:
             ),
         )
 
+    @public_task("Generate perf.yamls.")
+    @depends(["install_deps"])
+    def create_perfs(self, plan: Plan, step_id: str = "generate_perfs") -> str:
+        return plan.add_step(
+            step_id,
+            RunCommandsWithVenvTask(
+                group_name=None,
+                venv=self.venv_path,
+                commands=["python qai_hub_models/scripts/generate_perf_yaml.py --all"],
+            ),
+        )
+
     @public_task("Run profile jobs for all models in Model Zoo.")
     @depends(["install_deps"])
     def test_profile_all_models(
@@ -480,6 +504,21 @@ def release(self, plan: Plan, step_id: str = "release") -> str:
             ),
         )
 
+    @public_task("Push QAIHM Code (build repo & wheel, push repo)")
+    @depends(["install_deps"])
+    def release_code(self, plan: Plan, step_id: str = "release_code") -> str:
+        return plan.add_step(
+            step_id,
+            ReleaseTask(
+                self.venv_path,
+                self.python_executable,
+                build_repository=True,
+                push_repository=True,
+                build_wheel=False,
+                publish_wheel=False,
+            ),
+        )
+
     @public_task("Mock Release QAIHM (build repo & wheel, but do not push them)")
     @depends(["install_deps"])
     def mock_release(self, plan: Plan, step_id: str = "mock_release") -> str:
diff --git a/scripts/ci/git-credential-helper.sh b/scripts/ci/git-credential-helper.sh
index 1a294a88..1baff120 100644
--- a/scripts/ci/git-credential-helper.sh
+++ b/scripts/ci/git-credential-helper.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 echo username="$GIT_USER"
 echo password="$GIT_PASSWORD"
diff --git a/scripts/quantize_ffnet.py b/scripts/examples/quantize_ffnet.py
similarity index 100%
rename from scripts/quantize_ffnet.py
rename to scripts/examples/quantize_ffnet.py
diff --git a/scripts/examples/quantize_imagenet_classifier.py b/scripts/examples/quantize_imagenet_classifier.py
index 45fb88ce..79d1e063 100644
--- a/scripts/examples/quantize_imagenet_classifier.py
+++ b/scripts/examples/quantize_imagenet_classifier.py
@@ -8,13 +8,43 @@
 This script assumes the model is added to QAIHM, but is missing quantization parameters.
 """
 import argparse
-import importlib
 from pathlib import Path
 
 import torch
 from torch.utils.data import DataLoader
 
 from qai_hub_models.datasets.imagenette import ImagenetteDataset
+from qai_hub_models.models.googlenet_quantized.model import GoogLeNetQuantizable
+from qai_hub_models.models.inception_v3_quantized.model import InceptionNetV3Quantizable
+from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable
+from qai_hub_models.models.mobilenet_v3_large_quantized.model import (
+    MobileNetV3LargeQuantizable,
+)
+from qai_hub_models.models.regnet_quantized.model import RegNetQuantizable
+from qai_hub_models.models.resnet18_quantized.model import ResNet18Quantizable
+from qai_hub_models.models.resnet50_quantized.model import ResNet50Quantizable
+from qai_hub_models.models.resnet101_quantized.model import ResNet101Quantizable
+from qai_hub_models.models.resnext50_quantized.model import ResNeXt50Quantizable
+from qai_hub_models.models.resnext101_quantized.model import ResNeXt101Quantizable
+from qai_hub_models.models.shufflenet_v2_quantized.model import ShufflenetV2Quantizable
+from qai_hub_models.models.squeezenet1_1_quantized.model import SqueezeNetQuantizable
+from qai_hub_models.models.wideresnet50_quantized.model import WideResNet50Quantizable
+
+CLASSIFIERS = {
+    "googlenet": GoogLeNetQuantizable,
+    "inception_v3": InceptionNetV3Quantizable,
+    "mobilenet_v2": MobileNetV2Quantizable,
+    "mobilenet_v3_large": MobileNetV3LargeQuantizable,
+    "regnet": RegNetQuantizable,
+    "resnet101": ResNet101Quantizable,
+    "resnet18": ResNet18Quantizable,
+    "resnet50": ResNet50Quantizable,
+    "resnext50": ResNeXt50Quantizable,
+    "resnext101": ResNeXt101Quantizable,
+    "shufflenet_v2": ShufflenetV2Quantizable,
+    "squeezenet1_1": SqueezeNetQuantizable,
+    "wideresnet50": WideResNet50Quantizable,
+}
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -42,8 +72,9 @@
     parser.add_argument(
         "--model",
         type=str,
+        choices=CLASSIFIERS.keys(),
         required=True,
-        help="Name of the model folder to compute encodings.",
+        help="Name of the model to quantize.",
     )
     parser.add_argument(
         "--seed",
@@ -52,17 +83,17 @@
         help="Manual seed to ensure reproducibility for quantization.",
     )
     args = parser.parse_args()
-    module = importlib.import_module(f"qai_hub_models.models.{args.model}")
+    ImageNetClassifier_cls = CLASSIFIERS[args.model]
 
     dataset = ImagenetteDataset()
     torch.manual_seed(args.seed)
     dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
 
-    model = module.Model.from_pretrained(aimet_encodings=None)
+    model = ImageNetClassifier_cls.from_pretrained(aimet_encodings=None)
 
     accuracy = model.quantize(dataloader, args.num_iter, model.get_evaluator())
     print(f"Accuracy: {accuracy * 100:.3g}%")
 
     output_path = args.output_dir or str(Path() / "build")
-    output_name = args.output_name or f"{module.MODEL_ID}_encodings"
+    output_name = args.output_name or f"{args.model}_quantized_encodings"
     model.quant_sim.save_encodings_to_json(output_path, output_name)
diff --git a/scripts/examples/test_numerics_imagenet_classifier_quantized.py b/scripts/examples/test_numerics_imagenet_classifier_quantized.py
new file mode 100644
index 00000000..3416382a
--- /dev/null
+++ b/scripts/examples/test_numerics_imagenet_classifier_quantized.py
@@ -0,0 +1,306 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+"""
+Run it with pytest --on-device
+"""
+from typing import Tuple
+
+import numpy as np
+import pytest
+import qai_hub as hub
+import torch
+from torch.utils.data import DataLoader, random_split
+from tqdm import tqdm
+
+from qai_hub_models.datasets.imagenette import ImagenetteDataset
+from qai_hub_models.models._shared.imagenet_classifier.model import ImagenetClassifier
+from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable
+from qai_hub_models.models.mobilenet_v3_large_quantized.model import (
+    MobileNetV3LargeQuantizable,
+)
+from qai_hub_models.models.regnet_quantized.model import RegNetQuantizable
+from qai_hub_models.models.resnet18_quantized.model import ResNet18Quantizable
+from qai_hub_models.models.resnet50_quantized.model import ResNet50Quantizable
+from qai_hub_models.models.resnet101_quantized.model import ResNet101Quantizable
+from qai_hub_models.models.resnext50_quantized.model import ResNeXt50Quantizable
+from qai_hub_models.models.resnext101_quantized.model import ResNeXt101Quantizable
+from qai_hub_models.models.shufflenet_v2_quantized.model import ShufflenetV2Quantizable
+from qai_hub_models.models.squeezenet1_1_quantized.model import SqueezeNetQuantizable
+from qai_hub_models.models.wideresnet50_quantized.model import WideResNet50Quantizable
+from qai_hub_models.utils.base_model import SourceModelFormat, TargetRuntime
+from qai_hub_models.utils.inference import compile_zoo_model_to_hub
+from qai_hub_models.utils.measurement import get_model_size_mb
+
+
+def on_device(func):
+    # Skip tests if '--on-device' is not in the command line arguments
+    return pytest.mark.skipif(
+        "'--on-device' not in sys.argv", reason="needs --on-device option to run"
+    )(func)
+
+
+@pytest.fixture(scope="module")
+def data_loaders():
+    dataset = ImagenetteDataset()
+    calib_len = int(0.1 * len(dataset))
+    test_len = len(dataset) - calib_len
+    # Deterministic random split
+    calib_dataset, test_dataset = random_split(
+        dataset, [calib_len, test_len], generator=torch.Generator().manual_seed(42)
+    )
+    calib_loader = DataLoader(calib_dataset, batch_size=32, shuffle=False)
+    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
+    return calib_loader, test_loader
+
+
+@pytest.fixture(scope="module")
+def test_data(data_loaders) -> Tuple[torch.Tensor, torch.Tensor, hub.Dataset]:
+    calib_loader, test_loader = data_loaders
+    num_test = 1000
+
+    img_batches, label_batches = [], []
+    total_samples = 0
+    for images, labels in tqdm(test_loader):
+        img_batches.append(images)
+        label_batches.append(labels)
+        total_samples += images.size(0)
+        if total_samples >= 1000:
+            break
+    img_test = torch.cat(img_batches, dim=0)[:num_test]
+    label_test = torch.cat(label_batches, dim=0)[:num_test]
+    input_name = list(ImagenetClassifier.get_input_spec().keys())[0]
+    data_entries = {input_name: np.split(img_test.numpy(), img_test.shape[0])}
+    hub_ds = hub.upload_dataset(data_entries)
+    return img_test, label_test, hub_ds
+
+
+def test_dataloader_is_deterministic(data_loaders):
+    """Test that the calibration-test split and the loading are deterministic"""
+    calib_loader, test_loader = data_loaders
+    img, labels = next(iter(calib_loader))
+    expected_calib_labels = [701, 569, 482, 571, 482]
+    assert labels[:5].tolist() == expected_calib_labels
+
+    expected_test_labels = [569, 0, 217, 571, 701]
+    img, labels = next(iter(test_loader))
+    assert labels[:5].tolist() == expected_test_labels
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        # Class, Calibration accuracy, AIMET accuracy
+        (MobileNetV2Quantizable, 0.8021, 0.8100),
+        (MobileNetV3LargeQuantizable, 0.8438, 0.8550),
+        (ResNet18Quantizable, 0.8021, 0.8010),
+        (ResNet50Quantizable, 0.8229, 0.8520),
+        (ResNet101Quantizable, 0.8125, 0.8530),
+        (ResNeXt50Quantizable, 0.8333, 0.8880),
+        (ResNeXt101Quantizable, 0.8542, 0.9250),
+        (SqueezeNetQuantizable, 0.6042, 0.6410),
+        (RegNetQuantizable, 0.8229, 0.8750),
+        (WideResNet50Quantizable, 0.8958, 0.9190),
+        (ShufflenetV2Quantizable, 0.7083, 0.6740),
+    ],
+)
+def quantized_model(request, data_loaders, test_data):
+    """
+    Create encoding from calibration data and returned quantized model with
+    validated off-target accuracy computed on QuantSim
+    """
+    img_test, label_test, hub_dataset = test_data
+    calib_loader, test_loader = data_loaders
+    model_cls, target_calib_acc, target_sim_acc = request.param
+    model = model_cls.from_pretrained(aimet_encodings=None)
+
+    # Calibration in quantization
+    num_calib_batches = 3
+    calib_accuracy = model.quantize(
+        calib_loader, num_calib_batches, evaluator=model.get_evaluator()
+    )
+    print(f"{model_cls=}, {calib_accuracy=}")
+    np.testing.assert_allclose(target_calib_acc, calib_accuracy, atol=0.01)
+
+    # QuantSim evaluation on eval set
+    evaluator = model.get_evaluator()
+
+    batch_size = 32
+    for i in tqdm(list(range(0, img_test.size(0), batch_size)), desc="QuantSim eval"):
+        img_batch = img_test[i : i + batch_size]
+        label_batch = label_test[i : i + batch_size]
+
+        sim_out = model(img_batch).detach()
+        evaluator.add_batch(sim_out, label_batch)
+
+    sim_acc = evaluator.get_accuracy_score()
+    print(f"{model_cls=}, {sim_acc=}")
+    np.testing.assert_allclose(target_sim_acc, sim_acc, atol=0.01)
+    return model
+
+
+@on_device
+@pytest.mark.parametrize(
+    "source_model_format,target_runtime,hub_needs_calib_data",
+    [
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, False),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, False),
+    ],
+)
+def test_make_encoding_w8a8_accuracy(
+    source_model_format,
+    target_runtime,
+    hub_needs_calib_data,
+    test_data,
+    quantized_model,
+    data_loaders,
+):
+    """
+    1. Export and compile quantized_model on Hub.
+    2. Run inference on Hub on test.
+
+    Note: We don't run profile job to get perf here but leave that to the score card.
+    """
+    model = quantized_model
+
+    expected_size_mb_and_acc = {
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, MobileNetV2Quantizable): (
+            3.64,
+            0.784,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, MobileNetV2Quantizable): (
+            4.02,
+            0.790,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, MobileNetV3LargeQuantizable): (
+            5.79,
+            0.859,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, MobileNetV3LargeQuantizable): (
+            None,  # Fails to convert (AISW-87206)
+            None,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, ResNet18Quantizable): (
+            11.30,
+            0.778,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, ResNet18Quantizable): (
+            11.61,
+            0.789,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, ResNet50Quantizable): (
+            25.09,
+            0.837,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, ResNet50Quantizable): (
+            25.33,
+            0.834,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, ResNet101Quantizable): (
+            43.89,
+            0.827,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, ResNet101Quantizable): (
+            44.08,
+            0.831,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, ResNeXt50Quantizable): (
+            24.77,
+            0.888,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, ResNeXt50Quantizable): (
+            24.96,
+            0.888,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, ResNeXt101Quantizable): (
+            87.29,
+            0.906,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, ResNeXt101Quantizable): (
+            87.11,
+            None,  # Fails to infer (#9827)
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, SqueezeNetQuantizable): (
+            1.30,
+            0.609,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, SqueezeNetQuantizable): (
+            1.66,
+            0.609,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, RegNetQuantizable): (
+            15.43,
+            0.859,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, RegNetQuantizable): (
+            15.77,
+            0.859,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, WideResNet50Quantizable): (
+            66.59,
+            0.900,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, WideResNet50Quantizable): (
+            66.78,
+            0.897,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.TFLITE, ShufflenetV2Quantizable): (
+            1.47,
+            0.661,
+        ),
+        (SourceModelFormat.ONNX, TargetRuntime.QNN, ShufflenetV2Quantizable): (
+            1.90,
+            0.661,
+        ),
+    }
+    expected_size_mb, expected_acc = expected_size_mb_and_acc[
+        (source_model_format, target_runtime, model.__class__)
+    ]
+    if expected_size_mb is None:
+        pytest.skip("Fails to compile")
+
+    img_test, label_test, hub_dataset = test_data
+    calib_loader, test_loader = data_loaders
+
+    # calibration data
+    calibration_data = None
+    if hub_needs_calib_data:
+        # AIMET export has missing encoding and needs calibration data
+        num_calib_batches = 3
+        calib_imgs = []
+        for b, (img_calib, labels) in enumerate(iter(calib_loader)):
+            if b >= num_calib_batches:
+                break
+            img_np = img_calib.numpy()
+            calib_imgs.extend(np.split(img_np, img_np.shape[0]))
+        calibration_data = {list(model.get_input_spec().keys())[0]: calib_imgs}
+
+    # On-device inference
+    device = hub.Device("Samsung Galaxy S23")
+    hub_model = compile_zoo_model_to_hub(
+        model=model,
+        source_model_format=source_model_format,
+        device=device,
+        target_runtime=target_runtime,
+        calibration_data=calibration_data,
+    )
+
+    # Make sure model is quantized
+    tgt_model_size_mb = get_model_size_mb(hub_model.model)
+    model_cls = quantized_model.__class__
+    print(
+        f"{model_cls=}, {source_model_format=}, {target_runtime=}, {tgt_model_size_mb=}"
+    )
+    np.testing.assert_allclose(expected_size_mb, tgt_model_size_mb, rtol=0.1)
+
+    if expected_acc is None:
+        pytest.skip("Fails to infer")
+
+    # Check on-device accuracy
+    hub_out = hub_model(hub_dataset)
+    evaluator = model.get_evaluator()
+    evaluator.add_batch(hub_out, label_test)
+    hub_acc = evaluator.get_accuracy_score()
+    print(f"{model_cls=}, {source_model_format=}, {target_runtime=}, {hub_acc=}")
+    np.testing.assert_allclose(expected_acc, hub_acc, atol=0.01)
diff --git a/scripts/examples/test_numerics_mobilenet_v2_quantized.py b/scripts/examples/test_numerics_mobilenet_v2_quantized.py
deleted file mode 100644
index 55ba699d..00000000
--- a/scripts/examples/test_numerics_mobilenet_v2_quantized.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# ---------------------------------------------------------------------
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
-# SPDX-License-Identifier: BSD-3-Clause
-# ---------------------------------------------------------------------
-"""
-Run it with pytest --on-device
-"""
-from typing import Tuple
-
-import numpy as np
-import pytest
-import qai_hub as hub
-import torch
-from torch.utils.data import DataLoader, random_split
-from tqdm import tqdm
-
-from qai_hub_models.datasets.imagenette import ImagenetteDataset
-from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable
-from qai_hub_models.utils.inference import compile_zoo_model_to_hub
-from qai_hub_models.utils.measurement import get_model_size_mb
-
-
-def on_device(func):
-    # Skip tests if '--on-device' is not in the command line arguments
-    return pytest.mark.skipif(
-        "'--on-device' not in sys.argv", reason="needs --on-device option to run"
-    )(func)
-
-
-@pytest.fixture(scope="module")
-def data_loaders():
-    dataset = ImagenetteDataset()
-    calib_len = int(0.1 * len(dataset))
-    test_len = len(dataset) - calib_len
-    # Deterministic random split
-    calib_dataset, test_dataset = random_split(
-        dataset, [calib_len, test_len], generator=torch.Generator().manual_seed(42)
-    )
-    calib_loader = DataLoader(calib_dataset, batch_size=32, shuffle=False)
-    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
-    return calib_loader, test_loader
-
-
-@pytest.fixture(scope="module")
-def test_data(data_loaders) -> Tuple[torch.Tensor, torch.Tensor, hub.Dataset]:
-    calib_loader, test_loader = data_loaders
-    num_test = 1000
-
-    img_batches, label_batches = [], []
-    total_samples = 0
-    for images, labels in tqdm(test_loader):
-        img_batches.append(images)
-        label_batches.append(labels)
-        total_samples += images.size(0)
-        if total_samples >= 1000:
-            break
-    img_test = torch.cat(img_batches, dim=0)[:num_test]
-    label_test = torch.cat(label_batches, dim=0)[:num_test]
-    input_name = list(
-        MobileNetV2Quantizable.from_pretrained(aimet_encodings=None)
-        .get_input_spec()
-        .keys()
-    )[0]
-    data_entries = {input_name: np.split(img_test.numpy(), img_test.shape[0])}
-    hub_ds = hub.upload_dataset(data_entries)
-    return img_test, label_test, hub_ds
-
-
-def test_dataloader_is_deterministic(data_loaders):
-    """Test that the calibration-test split and the loading are deterministic"""
-    calib_loader, test_loader = data_loaders
-    img, labels = next(iter(calib_loader))
-    expected_calib_labels = [701, 569, 482, 571, 482]
-    assert labels[:5].tolist() == expected_calib_labels
-
-    expected_test_labels = [569, 0, 217, 571, 701]
-    img, labels = next(iter(test_loader))
-    assert labels[:5].tolist() == expected_test_labels
-
-
-@pytest.fixture(scope="module")
-def quantized_model(data_loaders, test_data):
-    """
-    Create encoding from calibration data and returned quantized model with
-    validated off-target accuracy computed on QuantSim
-    """
-    img_test, label_test, hub_dataset = test_data
-    calib_loader, test_loader = data_loaders
-    model = MobileNetV2Quantizable.from_pretrained(aimet_encodings=None)
-
-    # Calibration in quantization
-    num_calib_batches = 3
-    calib_accuracy = model.quantize(
-        calib_loader, num_calib_batches, evaluator=model.get_evaluator()
-    )
-    np.testing.assert_allclose(0.76, calib_accuracy, atol=0.01)
-
-    # QuantSim evaluation on eval set
-    evaluator = model.get_evaluator()
-
-    batch_size = 32
-    for i in tqdm(list(range(0, img_test.size(0), batch_size)), desc="QuantSim eval"):
-        img_batch = img_test[i : i + batch_size]
-        label_batch = label_test[i : i + batch_size]
-
-        sim_out = model(img_batch).detach()
-        evaluator.add_batch(sim_out, label_batch)
-
-    sim_acc = evaluator.get_accuracy_score()
-    print(f"{sim_acc=}")
-    np.testing.assert_allclose(0.78125, sim_acc, atol=0.01)
-    return model
-
-
-@on_device
-@pytest.mark.parametrize(
-    "target_runtime,hub_needs_calib_data,expected_size_mb,expected_acc",
-    [
-        ("onnx-tflite", False, 3.806, 0),
-        ("torch-tflite", False, 7.0891, 0.719),
-        ("onnx-qnn", False, 3.844, 0.76),
-        ("torch-qnn", True, 3.82, 0.7618),
-    ],
-)
-def test_make_encoding_w8a8_accuracy(
-    quantized_model,
-    data_loaders,
-    target_runtime,
-    hub_needs_calib_data,
-    expected_size_mb,
-    expected_acc,
-    test_data,
-):
-    """
-    1. Export and compile quantized_model on Hub.
-    2. Run inference on Hub on test.
-
-    Note: We don't run profile job to get perf here but leave that to the score card.
-    """
-    model = quantized_model
-
-    img_test, label_test, hub_dataset = test_data
-    calib_loader, test_loader = data_loaders
-
-    # calibration data
-    calibration_data = None
-    if hub_needs_calib_data:
-        # AIMET export has missing encoding and needs calibration data
-        num_calib_batches = 3
-        calib_imgs = []
-        for b, (img_calib, labels) in enumerate(iter(calib_loader)):
-            if b >= num_calib_batches:
-                break
-            img_np = img_calib.numpy()
-            calib_imgs.extend(np.split(img_np, img_np.shape[0]))
-        calibration_data = {list(model.get_input_spec().keys())[0]: calib_imgs}
-
-    # On-device inference
-    device = hub.Device("Samsung Galaxy S23")
-    hub_model = compile_zoo_model_to_hub(
-        model=model,
-        device=device,
-        target_runtime=target_runtime,
-        calibration_data=calibration_data,
-    )
-
-    # Make sure model is quantized
-    tgt_model_size_mb = get_model_size_mb(hub_model.model)
-    np.testing.assert_allclose(expected_size_mb, tgt_model_size_mb, rtol=0.1)
-
-    # Check on-device accuracy
-    hub_out = hub_model(hub_dataset)
-    evaluator = model.get_evaluator()
-    evaluator.add_batch(hub_out, label_test)
-    hub_acc = evaluator.get_accuracy_score()
-    print(f"{target_runtime=}, {hub_acc=}")
-    np.testing.assert_allclose(expected_acc, hub_acc, atol=0.01)
diff --git a/scripts/github/create-aws-profile.sh b/scripts/github/create-aws-profile.sh
new file mode 100755
index 00000000..4e71e75a
--- /dev/null
+++ b/scripts/github/create-aws-profile.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+set -euo pipefail
+
+LOCAL_AWS_ACCESS_KEY_ID="$1"
+LOCAL_AWS_SECRET_ACCESS_KEY="$2"
+LOCAL_AWS_DEFAULT_REGION="$3"
+LOCAL_AWS_PROFILE="$4"
+
+aws configure set aws_access_key_id "$LOCAL_AWS_ACCESS_KEY_ID" --profile "$LOCAL_AWS_PROFILE"
+aws configure set aws_secret_access_key "$LOCAL_AWS_SECRET_ACCESS_KEY" --profile "$LOCAL_AWS_PROFILE"
+aws configure set region "$LOCAL_AWS_DEFAULT_REGION" --profile "$LOCAL_AWS_PROFILE"
+
+aws sts get-caller-identity --profile "$LOCAL_AWS_PROFILE"
diff --git a/scripts/tasks/changes.py b/scripts/tasks/changes.py
index dbbf2fcb..fe5aec9d 100644
--- a/scripts/tasks/changes.py
+++ b/scripts/tasks/changes.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 import os
-from typing import Iterable, Set
+from typing import Iterable
 
 from .constants import (
     PY_PACKAGE_MODELS_ROOT,
@@ -34,7 +34,7 @@ def get_python_import_expression(filepath: str) -> str:
 
 
 def resolve_affected_models(
-    changed_files,
+    changed_files: Iterable[str],
     include_model: bool = True,
     include_demo: bool = True,
     include_export: bool = True,
@@ -55,11 +55,11 @@ def resolve_affected_models(
     changed_files: List of filepaths to files that changed. Paths are
         relative to the root of this repository.
     """
-    seen: Set[str] = set()
+    changed_files = list(changed_files)
+    seen = set(changed_files)
     while len(changed_files) > 0:
         # Pop off stack
         curr_file = changed_files.pop()
-        seen.add(curr_file)
 
         file_import = get_python_import_expression(curr_file)
         grep_out = run_and_get_output(
@@ -80,23 +80,30 @@ def resolve_affected_models(
         # Add new nodes to stack
         for dependent_file in dependent_files:
             if dependent_file not in seen:
+                seen.add(dependent_file)
                 changed_files.append(dependent_file)
 
     changed_models = set()
     for f in seen:
         if f.startswith(PY_PACKAGE_RELATIVE_MODELS_ROOT):
-            if not include_model and os.path.basename(f) == "model.py":
+            basename = os.path.basename(f)
+            if basename not in [
+                "model.py",
+                "export.py",
+                "test.py",
+                "test_generated.py",
+                "demo.py",
+            ]:
                 continue
-            if not include_export and os.path.basename(f) == "export.py":
+            if not include_model and basename == "model.py":
                 continue
-            if not include_tests and os.path.basename(f) == "test.py":
+            if not include_export and basename == "export.py":
                 continue
-            if (
-                not include_generated_tests
-                and os.path.basename(f) == "test_generated.py"
-            ):
+            if not include_tests and basename == "test.py":
+                continue
+            if not include_generated_tests and basename == "test_generated.py":
                 continue
-            if not include_demo and os.path.basename(f) == "demo.py":
+            if not include_demo and basename == "demo.py":
                 continue
 
             model_name = f[len(PY_PACKAGE_RELATIVE_MODELS_ROOT) :].split("/")[1]
@@ -115,17 +122,17 @@ def get_changed_files_in_package() -> Iterable[str]:
         os.makedirs("build/model-zoo/", exist_ok=True)
         changed_files_path = "build/changed-qaihm-files.txt"
         if not on_github():
-            run(
-                f"git diff $(git merge-base --fork-point origin/main) --name-only > {changed_files_path}"
-            )
+            run(f"git diff origin/main --name-only > {changed_files_path}")
         if os.path.exists(changed_files_path):
             with open(changed_files_path, "r") as f:
-                return [
+                changed_files = [
                     file
                     for file in f.read().split("\n")
                     if file.startswith(PY_PACKAGE_RELATIVE_SRC_ROOT)
                     and file.endswith(".py")
                 ]
+                # Weed out duplicates
+                return list(set(changed_files))
         return []
 
 
diff --git a/scripts/tasks/release.py b/scripts/tasks/release.py
index 00289045..86ede149 100644
--- a/scripts/tasks/release.py
+++ b/scripts/tasks/release.py
@@ -156,6 +156,7 @@ def __init__(self):
             "git reset origin/main",  # this checks out main "symbolically" (no on-disk source tree changes)
             "git add -u",  # Remove any deleted files from the index
             "git add -f *",
+            "git add -f .", # https://stackoverflow.com/questions/26042390/
             """git commit -m "$QAIHM_TAG
 
 Signed-off-by: $QAIHM_REPO_GH_SIGN_OFF_NAME <$QAIHM_REPO_GH_EMAIL>" """,
diff --git a/scripts/tasks/test.py b/scripts/tasks/test.py
index 84246a4c..324de146 100644
--- a/scripts/tasks/test.py
+++ b/scripts/tasks/test.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import os
+from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Iterable, Optional
 
@@ -18,6 +19,7 @@
 from .util import can_support_aimet, model_needs_aimet
 from .venv import (
     CreateVenvTask,
+    RunCommandsWithVenvTask,
     SyncLocalQAIHMVenvTask,
     SyncModelRequirementsVenvTask,
     SyncModelVenvTask,
@@ -189,7 +191,10 @@ def __init__(
         use_shared_cache: bool = False,  # Use the global QAIHM cache rather than a temporary one for tests.
         export_func: str = "compile",
         skip_standard_unit_test: bool = False,
+        test_trace: bool = True,
     ):
+        if len(models_for_testing) == 0 and len(models_to_test_export) == 0:
+            return super().__init__("All Per-Model Tests (Skipped)", [])
         tasks = []
 
         # Whether or not export tests will be run asynchronously
@@ -214,7 +219,52 @@ def __init__(
                 SyncLocalQAIHMVenvTask(base_test_venv, ["dev"], include_aimet=False)
             )
 
-        print(f"Tests to be run for directories: {models_for_testing}")
+        print(f"Tests to be run for models: {models_for_testing}")
+        if not venv_for_each_model:
+            non_global_models = []
+            global_models = []
+            for model_name in models_for_testing:
+                yaml_path = Path(PY_PACKAGE_MODELS_ROOT) / model_name / "code-gen.yaml"
+                global_incompatible = False
+                if yaml_path.exists():
+                    with open(yaml_path, "r") as f:
+                        if "global_requirements_incompatible" in f.read():
+                            global_incompatible = True
+                if global_incompatible:
+                    non_global_models.append(model_name)
+                else:
+                    global_models.append(model_name)
+
+            if len(global_models) > 0:
+                globals_path = Path(PY_PACKAGE_SRC_ROOT) / "global_requirements.txt"
+                tasks.append(
+                    RunCommandsWithVenvTask(
+                        group_name="Install Global Requirements",
+                        venv=base_test_venv,
+                        commands=[f'pip install -r "{globals_path}"'],
+                    )
+                )
+
+            trace_tag = " or trace" if test_trace else ""
+            for model_name in sorted(global_models):
+                files_to_test = []
+                model_dir = Path(PY_PACKAGE_MODELS_ROOT) / model_name
+                files_to_test.append(str(model_dir / "test.py"))
+                if model_name in models_to_test_export:
+                    generated_test_path = str(model_dir / "test_generated.py")
+                    if os.path.exists(generated_test_path):
+                        files_to_test.append(generated_test_path)
+                tasks.append(
+                    PyTestTask(
+                        group_name=f"Test model: {model_name}",
+                        venv=base_test_venv,
+                        report_name=f"changed-models-{model_name}",
+                        files_or_dirs=" ".join(files_to_test),
+                        parallel=False,
+                        extra_args=f'-s -m "unmarked or {export_func}{trace_tag}"',
+                    )
+                )
+            models_for_testing = non_global_models
         for model_name in models_for_testing:
             # Run standard test suite for this model.
             tasks.append(
@@ -222,7 +272,7 @@ def __init__(
                     model_name,
                     python_executable,
                     model_name in models_to_test_export,
-                    venv=None if venv_for_each_model else base_test_venv,
+                    venv=None,
                     use_shared_cache=use_shared_cache,
                     export_func=export_func,
                     skip_standard_unit_test=skip_standard_unit_test,
diff --git a/scripts/tasks/venv.py b/scripts/tasks/venv.py
index d3634f5d..b1c08eb0 100644
--- a/scripts/tasks/venv.py
+++ b/scripts/tasks/venv.py
@@ -12,7 +12,6 @@
     PY_PACKAGE_INSTALL_ROOT,
     PY_PACKAGE_MODELS_ROOT,
     PY_PACKAGE_SRC_ROOT,
-    QAI_HUB_LATEST_PATH,
     REPO_ROOT,
 )
 from .task import CompositeTask, RunCommandsTask, RunCommandsWithVenvTask
@@ -29,6 +28,8 @@ def __init__(self, venv_path: str, python_executable: str) -> None:
 
 def is_package_installed(package_name: str, venv_path: str | None = None) -> bool:
     if venv_path is not None:
+        if not os.path.exists(venv_path):
+            return False
         command = f'. {venv_path}/bin/activate && python -c "import {package_name}"'
     else:
         command = f'python -c "import {package_name}"'
@@ -51,7 +52,17 @@ def __init__(
     ) -> None:
         tasks = []
 
-        # Install AIMET first to avoid installing two versions of torch (one from AIMET, one from QAIHM).
+        extras_str = f"[{','.join(extras)}]" if extras else ""
+        tasks.append(
+            RunCommandsWithVenvTask(
+                group_name=f"Install QAIHM{extras_str}",
+                venv=venv_path,
+                commands=[
+                    f'pip install -e "{PY_PACKAGE_INSTALL_ROOT}{extras_str}" -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.13/index.html',
+                ],
+            )
+        )
+
         if include_aimet:
             if can_support_aimet():
                 if is_package_installed("aimet_torch", venv_path):
@@ -84,33 +95,6 @@ def __init__(
                     )
                 )
 
-        qai_hub_wheel_url = os.environ.get("QAI_HUB_WHEEL_URL", None)
-        if not is_package_installed("qai_hub", venv_path):
-            if qai_hub_wheel_url is None:
-                if os.path.exists(QAI_HUB_LATEST_PATH):
-                    qai_hub_wheel_url = QAI_HUB_LATEST_PATH
-
-            if qai_hub_wheel_url:
-                # Install local QAI Hub wheel if it exists, instead of pulling it from PyPi.
-                tasks.append(
-                    RunCommandsWithVenvTask(
-                        group_name="Install QAI Hub (Pre-Release)",
-                        venv=venv_path,
-                        commands=[f'pip install "{qai_hub_wheel_url}"'],
-                    )
-                )
-
-        extras_str = f"[{','.join(extras)}]" if extras else ""
-        tasks.append(
-            RunCommandsWithVenvTask(
-                group_name=f"Install QAIHM{extras_str}",
-                venv=venv_path,
-                commands=[
-                    f'pip install -e "{PY_PACKAGE_INSTALL_ROOT}{extras_str}" -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.13/index.html'
-                ],
-            )
-        )
-
         super().__init__(
             f"Create Local QAIHM{extras_str} Virtual Environment at {venv_path}",
             [task for task in tasks],
diff --git a/scripts/util/common.sh b/scripts/util/common.sh
index 0c7ac1d2..088fbaee 100644
--- a/scripts/util/common.sh
+++ b/scripts/util/common.sh
@@ -1,3 +1,7 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 # Common utilities
 
 # shellcheck disable=SC2034 # various definitions appear unused in this included source.
diff --git a/scripts/util/env_create.sh b/scripts/util/env_create.sh
index b315026e..71b85de8 100755
--- a/scripts/util/env_create.sh
+++ b/scripts/util/env_create.sh
@@ -1,3 +1,7 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 # shellcheck source=/dev/null # we are statically sourcing a script.
 # This can be sourced and hence does not specify an interpreter.
 
diff --git a/scripts/util/env_sync.sh b/scripts/util/env_sync.sh
index 4c6fb60d..a0165e11 100644
--- a/scripts/util/env_sync.sh
+++ b/scripts/util/env_sync.sh
@@ -1,3 +1,7 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 # This should be sourced and hence does not specify an interpreter.
 
 REPO_ROOT=$(git rev-parse --show-toplevel)
diff --git a/scripts/util/github.sh b/scripts/util/github.sh
index 619c2baa..5b7cee29 100644
--- a/scripts/util/github.sh
+++ b/scripts/util/github.sh
@@ -1,3 +1,7 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 REPO_ROOT=$(git rev-parse --show-toplevel)
 
 . "${REPO_ROOT}/scripts/util/common.sh"
diff --git a/scripts/util/pytest_with_coverage.sh b/scripts/util/pytest_with_coverage.sh
index 7863f986..7b141a57 100755
--- a/scripts/util/pytest_with_coverage.sh
+++ b/scripts/util/pytest_with_coverage.sh
@@ -1,5 +1,9 @@
 #!/usr/bin/env bash
 
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 REPO_ROOT=$(git rev-parse --show-toplevel)
 
 # Load helpers
diff --git a/scripts/util/run_mypy.sh b/scripts/util/run_mypy.sh
index 5c4d8f98..416eaec9 100755
--- a/scripts/util/run_mypy.sh
+++ b/scripts/util/run_mypy.sh
@@ -1,4 +1,8 @@
 #!/usr/bin/env bash
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
 # shellcheck source=/dev/null
 
 REPO_ROOT=$(git rev-parse --show-toplevel)
@@ -18,5 +22,5 @@ paths=(qai_hub_models)
 for path in "${paths[@]}"; do
     pathToCheck="${path}"
     echo "Running mypy on ${pathToCheck}"
-    mypy --warn-unused-configs --config-file="${REPO_ROOT}/mypy.ini" "${pathToCheck}"
+    mypy --ignore-missing-imports --warn-unused-configs --config-file="${REPO_ROOT}/mypy.ini" "${pathToCheck}"
 done
diff --git a/setup.py b/setup.py
index 989a9ca0..a40bd1ae 100644
--- a/setup.py
+++ b/setup.py
@@ -68,5 +68,5 @@ def get_data_files() -> List[str]:
     include_package_data=True,
     install_requires=[line.strip() for line in open(requirements_path).readlines()],
     extras_require=extras_require,
-    license="MIT",
+    license="BSD-3",
 )