diff --git a/.github/ISSUE_TEMPLATE/compute_issue_template b/.github/ISSUE_TEMPLATE/compute_issue_template
new file mode 100644
index 00000000..802d541d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/compute_issue_template
@@ -0,0 +1,33 @@
+---
+name: Compute Bring Your Own Model - Bug report
+about: Create a report to help us improve
+title: "[BUG] Compute BYOM Issue: <insert issue here>"
+labels: 'compute'
+assignees: ''
+
+---
+
+**Describe the issue**
+Please provide details relating to the issue you're hitting, if it is related to performance, accuracy or other model issues with bringing your own model to Qualcomm AI Hub, to deploy to compute devices.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Stack trace**
+If applicable, add screenshots to help explain your problem.
+
+**Host configuration:**
+ - OS and version: [e.g. Linux, Windows, macOS]
+ - Browser [e.g. chrome, safari]
+ - QAI-Hub-Models version:
+ - QAI-Hub client version:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/iot_issue_template.md b/.github/ISSUE_TEMPLATE/iot_issue_template.md
new file mode 100644
index 00000000..52735148
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/iot_issue_template.md
@@ -0,0 +1,33 @@
+---
+name: IOT Bring Your Own Model - Model Issue
+about: Create a report to help us improve
+title: "[BUG] IOT BYOM Issue: <insert issue here>"
+labels: 'iot'
+assignees: ''
+
+---
+
+**Describe the issue**
+Please provide details relating to the issue you're hitting, if it is related to performance, accuracy or other model issues with bringing your own model to Qualcomm AI Hub, to deploy to iot devices.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Stack trace**
+If applicable, add screenshots to help explain your problem.
+
+**Host configuration:**
+ - OS and version: [e.g. Linux, Windows, macOS]
+ - Browser [e.g. chrome, safari]
+ - QAI-Hub-Models version:
+ - QAI-Hub client version:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/mobile_issue_template.md b/.github/ISSUE_TEMPLATE/mobile_issue_template.md
new file mode 100644
index 00000000..5177cb85
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/mobile_issue_template.md
@@ -0,0 +1,33 @@
+---
+name: Mobile Bring Your Own Model - Model Issue
+about: Create a report to help us improve
+title: "[BUG] Mobile BYOM Issue: <insert issue here>"
+labels: 'mobile'
+assignees: ''
+
+---
+
+**Describe the issue**
+Please provide details relating to the issue you're hitting, if it is related to performance, accuracy or other model issues with bringing your own model to Qualcomm AI Hub, to deploy to mobile devices.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Stack trace**
+If applicable, add screenshots to help explain your problem.
+
+**Host configuration:**
+ - OS and version: [e.g. Linux, Windows, macOS]
+ - Browser [e.g. chrome, safari]
+ - QAI-Hub-Models version:
+ - QAI-Hub client version:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/README.md b/README.md
index ce140fb0..2cac033b 100644
--- a/README.md
+++ b/README.md
@@ -286,6 +286,7 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE
 | [MobileNet-v3-Large-Quantized](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized) | [qai_hub_models.models.mobilenet_v3_large_quantized](qai_hub_models/models/mobilenet_v3_large_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [MobileNet-v3-Small](https://aihub.qualcomm.com/models/mobilenet_v3_small) | [qai_hub_models.models.mobilenet_v3_small](qai_hub_models/models/mobilenet_v3_small/README.md) | ✔️ | ✔️ | ✔️
 | [RegNet](https://aihub.qualcomm.com/models/regnet) | [qai_hub_models.models.regnet](qai_hub_models/models/regnet/README.md) | ✔️ | ✔️ | ✔️
+| [RegNetQuantized](https://aihub.qualcomm.com/models/regnet_quantized) | [qai_hub_models.models.regnet_quantized](qai_hub_models/models/regnet_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [ResNeXt101](https://aihub.qualcomm.com/models/resnext101) | [qai_hub_models.models.resnext101](qai_hub_models/models/resnext101/README.md) | ✔️ | ✔️ | ✔️
 | [ResNeXt101Quantized](https://aihub.qualcomm.com/models/resnext101_quantized) | [qai_hub_models.models.resnext101_quantized](qai_hub_models/models/resnext101_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [ResNeXt50](https://aihub.qualcomm.com/models/resnext50) | [qai_hub_models.models.resnext50](qai_hub_models/models/resnext50/README.md) | ✔️ | ✔️ | ✔️
@@ -295,6 +296,7 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE
 | [ResNet18](https://aihub.qualcomm.com/models/resnet18) | [qai_hub_models.models.resnet18](qai_hub_models/models/resnet18/README.md) | ✔️ | ✔️ | ✔️
 | [ResNet18Quantized](https://aihub.qualcomm.com/models/resnet18_quantized) | [qai_hub_models.models.resnet18_quantized](qai_hub_models/models/resnet18_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [ResNet50](https://aihub.qualcomm.com/models/resnet50) | [qai_hub_models.models.resnet50](qai_hub_models/models/resnet50/README.md) | ✔️ | ✔️ | ✔️
+| [ResNet50Quantized](https://aihub.qualcomm.com/models/resnet50_quantized) | [qai_hub_models.models.resnet50_quantized](qai_hub_models/models/resnet50_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [Shufflenet-v2](https://aihub.qualcomm.com/models/shufflenet_v2) | [qai_hub_models.models.shufflenet_v2](qai_hub_models/models/shufflenet_v2/README.md) | ✔️ | ✔️ | ✔️
 | [Shufflenet-v2Quantized](https://aihub.qualcomm.com/models/shufflenet_v2_quantized) | [qai_hub_models.models.shufflenet_v2_quantized](qai_hub_models/models/shufflenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [SqueezeNet-1_1](https://aihub.qualcomm.com/models/squeezenet1_1) | [qai_hub_models.models.squeezenet1_1](qai_hub_models/models/squeezenet1_1/README.md) | ✔️ | ✔️ | ✔️
@@ -368,13 +370,16 @@ Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICE
 | | | | |
 | **Pose Estimation**
 | [HRNetPose](https://aihub.qualcomm.com/models/hrnet_pose) | [qai_hub_models.models.hrnet_pose](qai_hub_models/models/hrnet_pose/README.md) | ✔️ | ✔️ | ✔️
+| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | ✔️ | ✔️ | ✔️
 | [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | ✔️ | ✔️ | ✔️
 | [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | ✔️ | ✔️ | ✔️
 | [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️
 | [Posenet-Mobilenet](https://aihub.qualcomm.com/models/posenet_mobilenet) | [qai_hub_models.models.posenet_mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | ✔️ | ✔️ | ✔️
+| [Posenet-Mobilenet-Quantized](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized) | [qai_hub_models.models.posenet_mobilenet_quantized](qai_hub_models/models/posenet_mobilenet_quantized/README.md) | ✔️ | ✔️ | ✔️
 | | | | |
 | **Depth Estimation**
 | [Midas-V2](https://aihub.qualcomm.com/models/midas) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | ✔️ | ✔️ | ✔️
+| [Midas-V2-Quantized](https://aihub.qualcomm.com/models/midas_quantized) | [qai_hub_models.models.midas_quantized](qai_hub_models/models/midas_quantized/README.md) | ✔️ | ✔️ | ✔️
 
 ### Audio
 
diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
index 49a2e97c..d5abcbec 100644
--- a/qai_hub_models/_version.py
+++ b/qai_hub_models/_version.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-__version__ = "0.8.0"
+__version__ = "0.9.0"
diff --git a/qai_hub_models/evaluators/base_evaluators.py b/qai_hub_models/evaluators/base_evaluators.py
index d933680d..51235aee 100644
--- a/qai_hub_models/evaluators/base_evaluators.py
+++ b/qai_hub_models/evaluators/base_evaluators.py
@@ -135,7 +135,6 @@ def _for_each_batch(
             The input, output, and (if provided) ground_truth will be passed to this function after each inference.
     """
     torch_device = torch.device(device)
-    model.eval()
     model.to(torch_device)
     total_samples = 0
     num_samples = num_samples or len(data)
diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt
index af284942..b3dc1e52 100644
--- a/qai_hub_models/global_requirements.txt
+++ b/qai_hub_models/global_requirements.txt
@@ -9,6 +9,7 @@ Deprecated==1.2.11
 PySoundFile; sys_platform == 'win32'
 aimet-torch==1.31.2; sys_platform == "linux"
 albumentations==0.5.2
+audio2numpy==0.1.2
 basicsr==1.4.2
 boto3==1.34.119
 botocore==1.34.119
@@ -41,6 +42,7 @@ pytorch-lightning==1.6.0
 rapidfuzz==3.8.1
 regex==2023.10.3
 ruamel-yaml==0.18.6
+samplerate==0.2.1
 schema==0.7.5
 scikit-image==0.21.0
 scikit-learn==1.1.3
diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py
index 03325642..e9413727 100644
--- a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py
+++ b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py
@@ -119,8 +119,7 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray:
         )
 
         input_tensor = preprocess_cityscapes_image(resized_image)
-        with torch.no_grad():
-            small_res_output = self.model(input_tensor)
+        small_res_output = self.model(input_tensor)
 
         output = F.interpolate(
             small_res_output,
diff --git a/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py b/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py
index c098f281..c6f8927f 100644
--- a/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py
+++ b/qai_hub_models/models/_shared/convnext_tiny_quantized/model.py
@@ -122,5 +122,4 @@ def from_pretrained(
                 aimet_encodings = cls._default_aimet_encodings()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/_shared/deeplab/app.py b/qai_hub_models/models/_shared/deeplab/app.py
index 2a8b929a..85bcf519 100644
--- a/qai_hub_models/models/_shared/deeplab/app.py
+++ b/qai_hub_models/models/_shared/deeplab/app.py
@@ -64,9 +64,8 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray:
         """
 
         input_tensor = preprocess_image(image)
-        with torch.no_grad():
-            output = self.model(input_tensor)
-            output = output[0]
+        output = self.model(input_tensor)
+        output = output[0]
         predictions = output.argmax(0).byte().cpu().numpy()
 
         if raw_output:
diff --git a/qai_hub_models/models/_shared/detr/app.py b/qai_hub_models/models/_shared/detr/app.py
index 3797f4aa..d14885b4 100644
--- a/qai_hub_models/models/_shared/detr/app.py
+++ b/qai_hub_models/models/_shared/detr/app.py
@@ -81,8 +81,7 @@ def predict(
             )
         image_array = normalize_image_torchvision(preprocess_PIL_image(image))
 
-        with torch.no_grad():
-            outputs = self.model(image_array)
+        outputs = self.model(image_array)
         target_sizes = torch.tensor(image.size[::-1]).unsqueeze(0)
 
         out_logits, out_bbox = outputs[0], outputs[1]
diff --git a/qai_hub_models/models/_shared/detr/model.py b/qai_hub_models/models/_shared/detr/model.py
index a92a0a91..c5f812a5 100644
--- a/qai_hub_models/models/_shared/detr/model.py
+++ b/qai_hub_models/models/_shared/detr/model.py
@@ -27,7 +27,6 @@ def __init__(self, model: nn.Module) -> None:
     @classmethod
     def from_pretrained(cls, ckpt_name: str):
         model = DetrForObjectDetection.from_pretrained(ckpt_name)
-        model.eval()
         return cls(model)
 
     def forward(self, image: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
diff --git a/qai_hub_models/models/_shared/fastsam/model.py b/qai_hub_models/models/_shared/fastsam/model.py
index 4342fb72..2ed257d7 100644
--- a/qai_hub_models/models/_shared/fastsam/model.py
+++ b/qai_hub_models/models/_shared/fastsam/model.py
@@ -22,7 +22,6 @@ def __init__(self, model: nn.Module) -> None:
     @classmethod
     def from_pretrained(cls, ckpt_name: str):
         model = FastSAM(ckpt_name).model
-        model.eval()
         return cls(model)
 
     def forward(self, image: torch.Tensor):
diff --git a/qai_hub_models/models/_shared/ffnet/model.py b/qai_hub_models/models/_shared/ffnet/model.py
index e1fb853a..ba54b038 100644
--- a/qai_hub_models/models/_shared/ffnet/model.py
+++ b/qai_hub_models/models/_shared/ffnet/model.py
@@ -69,7 +69,6 @@ class FFNet(CityscapesSegmentor):
     @classmethod
     def from_pretrained(cls: Type[FFNetType], variant_name: str) -> FFNetType:
         model = _load_ffnet_source_model(variant_name)
-        model.eval()
 
         return cls(model)
 
@@ -121,7 +120,7 @@ def _load_ffnet_source_model(variant_name) -> torch.nn.Module:
 
         from models.model_registry import model_entrypoint
 
-        model = model_entrypoint(variant_name)().eval()
+        model = model_entrypoint(variant_name)()
         return model
 
 
diff --git a/qai_hub_models/models/_shared/ffnet_quantized/model.py b/qai_hub_models/models/_shared/ffnet_quantized/model.py
index 9a7e6522..d9a29dd5 100644
--- a/qai_hub_models/models/_shared/ffnet_quantized/model.py
+++ b/qai_hub_models/models/_shared/ffnet_quantized/model.py
@@ -78,5 +78,4 @@ def from_pretrained(
                 aimet_encodings = cls.default_aimet_encodings()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/_shared/imagenet_classifier/app.py b/qai_hub_models/models/_shared/imagenet_classifier/app.py
index bee0e780..2762b89f 100644
--- a/qai_hub_models/models/_shared/imagenet_classifier/app.py
+++ b/qai_hub_models/models/_shared/imagenet_classifier/app.py
@@ -75,6 +75,5 @@ def predict(self, image: Image) -> torch.Tensor:
             to a different Imagenet1K class.
         """
         input_tensor = preprocess_image(image, not self.normalization_in_network)
-        with torch.no_grad():
-            output = self.model(input_tensor)
+        output = self.model(input_tensor)
         return torch.softmax(output[0], dim=0)
diff --git a/qai_hub_models/models/_shared/imagenet_classifier/model.py b/qai_hub_models/models/_shared/imagenet_classifier/model.py
index ac4e1b4d..878a6673 100644
--- a/qai_hub_models/models/_shared/imagenet_classifier/model.py
+++ b/qai_hub_models/models/_shared/imagenet_classifier/model.py
@@ -55,7 +55,6 @@ def __init__(
         self.normalize_input = normalize_input
         self.transform_input = transform_input
         self.net = net
-        self.eval()
 
     # Type annotation on image_tensor causes aimet onnx export failure
     def forward(self, image_tensor):
diff --git a/qai_hub_models/models/_shared/llama/__init__.py b/qai_hub_models/models/_shared/llama/__init__.py
new file mode 100644
index 00000000..21a22b31
--- /dev/null
+++ b/qai_hub_models/models/_shared/llama/__init__.py
@@ -0,0 +1,4 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/app.py b/qai_hub_models/models/_shared/llama/app.py
similarity index 59%
rename from qai_hub_models/models/llama_v2_7b_chat_quantized/app.py
rename to qai_hub_models/models/_shared/llama/app.py
index f27b7d29..65a95e64 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/app.py
+++ b/qai_hub_models/models/_shared/llama/app.py
@@ -2,28 +2,18 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+from __future__ import annotations
+
 import gc
-from typing import Any, Callable, List, Tuple
+from abc import abstractmethod
+from typing import Any, Callable, Dict, List, Set, Tuple
 
 import qai_hub as hub
 import torch
 
-from qai_hub_models.models.llama_v2_7b_chat_quantized.model import (
-    NUM_SPLITS,
-    Llama2_PromptProcessor_1_Quantized,
-    Llama2_PromptProcessor_2_Quantized,
-    Llama2_PromptProcessor_3_Quantized,
-    Llama2_PromptProcessor_4_Quantized,
-    Llama2_TokenGenerator_1_Quantized,
-    Llama2_TokenGenerator_2_Quantized,
-    Llama2_TokenGenerator_3_Quantized,
-    Llama2_TokenGenerator_4_Quantized,
-    get_input_prompt_with_tags,
-    get_past_keyval_with_shift,
-    prepare_combined_attention_mask,
-)
-from qai_hub_models.models.llama_v2_7b_chat_quantized.modeling_llama import (
+from qai_hub_models.models._shared.llama.model import (
     RopeEmbedding,
+    get_past_keyval_with_shift,
 )
 from qai_hub_models.utils.base_model import CollectionModel
 from qai_hub_models.utils.inference import ExecutableModelProtocol, HubModel
@@ -35,36 +25,22 @@ def _get_tokens_from_logits(output: torch.Tensor):
     return torch.multinomial(probs, num_samples=1).squeeze(1)
 
 
-def _get_model_class(split_part: int, is_token_generator: bool = False):
-    if split_part < 1 or split_part > 4:
-        raise RuntimeError(
-            "Incorrect index provided to request Model split class."
-            f" Must be within (1-4), provided ({split_part})."
-        )
-
-    if is_token_generator:
-        return [
-            Llama2_TokenGenerator_1_Quantized,
-            Llama2_TokenGenerator_2_Quantized,
-            Llama2_TokenGenerator_3_Quantized,
-            Llama2_TokenGenerator_4_Quantized,
-        ][split_part - 1]
-    return [
-        Llama2_PromptProcessor_1_Quantized,
-        Llama2_PromptProcessor_2_Quantized,
-        Llama2_PromptProcessor_3_Quantized,
-        Llama2_PromptProcessor_4_Quantized,
-    ][split_part - 1]
-
-
-class Llama2ModelPipelineBase(ExecutableModelProtocol):
+class LlamaModelPipelineBase(ExecutableModelProtocol):
     """
     Llama Pipeline to execute model splits one after another
     """
 
-    def __init__(self, num_splits: int, is_token_generator: bool = False):
+    def __init__(
+        self,
+        num_splits: int,
+        num_past_key_val_heads: int,
+        model_split_map: Dict[int, Tuple[int, int]],
+        is_token_generator: bool = False,
+    ):
         self.num_splits = num_splits
         self.is_token_generator = is_token_generator
+        self.num_past_key_val_heads = num_past_key_val_heads
+        self.model_split_map = model_split_map
         self.model_type = "TokenGenerator" if is_token_generator else "PromptProcessor"
 
     def __call__(
@@ -94,10 +70,11 @@ def forward(
             del model
             gc.collect()
             input_ids = out[0]
-            past_key_values.extend(list(out[1:]))
+            for each in out[1:]:
+                past_key_values.extend(list(torch.split(each, 1, dim=1)))
 
         # Return logits + past_key_values
-        return (out[0],) + tuple(past_key_values)
+        return tuple((out[0], *past_key_values))
 
     def forward_tg(
         self,
@@ -108,13 +85,18 @@ def forward_tg(
         *past_key_values,
     ):
         past_key_values_new = []
-        n = 512
+        start_past_key_offset = 0
         for i in range(1, self.num_splits + 1):
             with suppress_warnings():
                 model = self.load_model_part(i)
             print(f"Running {self.model_type} {i}/{self.num_splits}")
-            split_offset = n * (i - 1)
-            past_values = past_key_values[split_offset : split_offset + n]
+            layer_start, layer_end = self.model_split_map[i]
+            num_of_key_vals = (
+                self.num_past_key_val_heads * 2 * (layer_end - layer_start)
+            )
+
+            end_past_key_offset = start_past_key_offset + num_of_key_vals
+            past_values = past_key_values[start_past_key_offset:end_past_key_offset]
             out = model(
                 input_ids,
                 attention_mask,
@@ -131,7 +113,7 @@ def forward_tg(
 
             for j, new_cache_j in enumerate(out[1:]):
                 # Construct new past entries by concatenating old and new
-                past_j = past_key_values[split_offset + j]
+                past_j = past_key_values[start_past_key_offset + j]
 
                 # Concatenation is not always along the same dimension
                 if new_cache_j.shape[3] == 1:
@@ -151,12 +133,17 @@ def forward_tg(
                         dim=dim,
                     )
                 )
+            start_past_key_offset = end_past_key_offset
 
         # Return logits + past_key_values
-        return (out[0],) + tuple(past_key_values_new)
+        return tuple((out[0], *past_key_values_new))
 
+    @abstractmethod
+    def load_model_part(self, model_part: int):
+        pass
 
-class HubLlama2ModelPipeline(Llama2ModelPipelineBase):
+
+class HubLlamaModelPipeline(LlamaModelPipelineBase):
     """
     Pipeline wrapper for HubModels
     """
@@ -166,21 +153,29 @@ def __init__(
         hub_model_ids: List[str],
         hub_device: hub.Device,
         inference_options: str,
+        get_model_class: Callable,
+        num_past_key_val_heads: int,
+        model_split_map: Dict[int, Tuple[int, int]],
         is_token_generator: bool = False,
     ):
-        super().__init__(len(hub_model_ids), is_token_generator=is_token_generator)
+        super().__init__(
+            len(hub_model_ids),
+            num_past_key_val_heads,
+            model_split_map,
+            is_token_generator=is_token_generator,
+        )
         self.models = []
         for i, model_id in enumerate(hub_model_ids):
             hub_model = HubModel(
                 hub.get_model(model_id),
-                input_names=_get_model_class(
+                input_names=get_model_class(
                     i + 1, is_token_generator=is_token_generator
                 )
                 .get_input_spec()
                 .keys(),
                 device=hub_device,
                 inference_options=inference_options,
-                output_names=_get_model_class(
+                output_names=get_model_class(
                     i + 1, is_token_generator=is_token_generator
                 ).get_output_names(),
             )
@@ -190,95 +185,122 @@ def load_model_part(self, model_part: int):
         model_index = model_part - 1
         if model_index < 0 or model_index > len(self.models):
             raise RuntimeError(
-                f"HubLlama2ModelPipeline does not have requested model_part {model_part}."
+                f"HubLlamaModelPipeline does not have requested model_part {model_part}."
             )
-
         return self.models[model_index]
 
 
-class Llama2ModelPipeline(Llama2ModelPipelineBase):
+class LlamaModelPipeline(LlamaModelPipelineBase):
     """
     Pipeline wrapper for PyTorch base model
     """
 
     def __init__(
-        self, prompt_processor: CollectionModel, is_token_generator: bool = False
+        self,
+        models: CollectionModel,
+        num_splits: int,
+        num_past_key_val_heads: int,
+        model_split_map: Dict[int, Tuple[int, int]],
+        is_token_generator: bool = False,
     ):
-        self.prompt_processor = prompt_processor
+        self.models = models
+        self.num_splits = num_splits
         self.model_type = "TokenGenerator" if is_token_generator else "PromptProcessor"
-        super().__init__(NUM_SPLITS, is_token_generator=is_token_generator)
+        super().__init__(
+            num_splits,
+            num_past_key_val_heads=num_past_key_val_heads,
+            model_split_map=model_split_map,
+            is_token_generator=is_token_generator,
+        )
 
     def load_model_part(self, model_part: int):
-        if model_part < 1 or model_part > NUM_SPLITS:
+        if model_part < 1 or model_part > self.num_splits:
             raise RuntimeError(
-                f"ModelLlama2ModelPipeline does not have requested model_part {model_part}."
+                f"ModelLlamaModelPipeline does not have requested model_part {model_part}."
             )
-        return self.prompt_processor.load_model_part(
-            f"Llama2_{self.model_type}_{model_part}_Quantized"
-        )
+        return self.models.load_model_part(f"{self.model_type}_{model_part}_Quantized")
 
 
 class ChatApp:
+    """
+    This class is demonstration of how once can use Llama model to build a basic ChatApp.
+    This App use two models
+        * Prompt Processor
+            - Processes user input prompt to generate first token and KV-cache
+        * Token Generator
+            - Generators output token one at a time
+            - Uses KV-cache to speed up token generation
+    """
+
     def __init__(
-        self, prompt_processor: Callable, token_generator: Callable, tokenizer: Any
+        self,
+        prompt_processor: Callable,
+        token_generator: Callable,
+        get_input_prompt_with_tags: Callable,
+        prepare_combined_attention_mask: Callable,
+        tokenizer: Any,
+        end_tokens: Set[str],
+        num_past_key_val_heads: int,
     ):
+        """
+        Base ChatApp that generates one response for given input token.
+
+            prompt_processor: Prompt Processor collection model
+            token_generator: Token Generator collection model
+            get_input_prompt_with_tags: Function to wrap input prompt with appropriate tags
+            prepare_combined_attention_mask: Function to combine and build attention mask,
+            tokenizer: Tokenizer to use,
+            end_tokens: Set of end tokens to convey end of token generation,
+            num_past_key_val_heads: Number of heads in past-key value,
+        """
         self.prompt_processor = prompt_processor
         self.token_generator = token_generator
+        self.get_input_prompt_with_tags = get_input_prompt_with_tags
+        self.prepare_combined_attention_mask = prepare_combined_attention_mask
         self.tokenizer = tokenizer
+        self.end_tokens = end_tokens
+        self.num_past_key_val_heads = num_past_key_val_heads
 
     def generate_output_prompt(
         self, input_prompt: str, max_seq_len: int, max_output_tokens: int
     ):
-        input_prompt_processed = get_input_prompt_with_tags(
+        input_prompt_processed = self.get_input_prompt_with_tags(
             user_input_prompt=input_prompt
         )
-        input_tokens = self.tokenizer(input_prompt_processed, return_tensors="pt")
-        token_size = input_tokens["input_ids"].shape[-1]
-        padding_size = max_seq_len - token_size
-
-        input_ids = torch.cat(
-            (
-                torch.Tensor([self.tokenizer.unk_token_id] * padding_size).reshape(
-                    1, padding_size
-                ),
-                input_tokens["input_ids"],
-            ),
-            dim=-1,
-        ).type(torch.int32)
-        attention_mask = torch.cat(
-            (
-                torch.Tensor([0] * padding_size).reshape(1, padding_size),
-                input_tokens["attention_mask"],
-            ),
-            dim=-1,
-        ).type(torch.int32)
-        cm_attention_masks = prepare_combined_attention_mask(
-            attention_mask=attention_mask
+
+        input_tokens = self.tokenizer(
+            input_prompt_processed,
+            return_tensors="pt",
+            padding="max_length",
+            max_length=max_seq_len,
         )
+        input_ids = input_tokens["input_ids"].type(torch.long)
+        num_tokens = torch.sum(input_tokens["attention_mask"]).item()
+        padding_size = max_seq_len - num_tokens
+        position_ids = [0] * (padding_size) + list(range(0, num_tokens))
         position_ids = (
-            torch.cat(
-                (
-                    torch.zeros(
-                        padding_size,
-                    ),
-                    torch.arange(token_size),
-                )
-            )
-            .reshape(1, max_seq_len)
-            .type(torch.int32)
+            torch.Tensor(position_ids).type(torch.long).reshape(1, max_seq_len)
+        )
+        attention_mask = input_tokens["attention_mask"].type(torch.float32)
+        cm_attention_masks = self.prepare_combined_attention_mask(
+            attention_mask=attention_mask,
+            input_shape=input_tokens["attention_mask"].shape,
         )
-
         position_ids = (
             torch.Tensor(position_ids).type(torch.long).reshape(1, max_seq_len)
         )
         position_ids_cos, position_ids_sin = RopeEmbedding(
             max_length=max_seq_len
         ).get_embedding(position_ids)
+
+        # Process input prompt
         output = self.prompt_processor(
             input_ids, cm_attention_masks, position_ids_cos, position_ids_sin
         )
         output_token = _get_tokens_from_logits(output)
-        past_key_values = get_past_keyval_with_shift(output[1:]).values()
+        past_key_values = get_past_keyval_with_shift(
+            output[1:], num_of_past_key_heads=self.num_past_key_val_heads
+        ).values()
         output_prompt = self.tokenizer.decode(output_token)
         print()
         print(f"Text generated by Prompt Processor: {output_prompt}")
@@ -286,10 +308,8 @@ def generate_output_prompt(
 
         # Collect output prompt to summarize later
         hub_tokens = output_token
-        num_of_tokens_processed = token_size + 1
+        num_of_tokens_processed = num_tokens + 1
 
-        # TODO: Revisiting demo and app to refactor like a chat-bot
-        # This is just a place-holder to show how both models work together
         for _ in range(max_output_tokens - 1):
             # TODO: check if previous generated token is EOS
             if num_of_tokens_processed >= max_seq_len:
@@ -300,7 +320,7 @@ def generate_output_prompt(
             attention_mask = torch.cat(
                 (attention_mask[:, 1:], torch.Tensor([[1]])), dim=-1
             )
-            cm_attention_masks = prepare_combined_attention_mask(
+            cm_attention_masks = self.prepare_combined_attention_mask(
                 attention_mask=attention_mask,
                 input_shape=(1, 1),
                 past_key_values_length=max_seq_len - 1,
@@ -311,6 +331,8 @@ def generate_output_prompt(
             position_ids_cos, position_ids_sin = RopeEmbedding(
                 max_length=max_seq_len
             ).get_embedding(position_ids)
+
+            # Generate output token
             output = self.token_generator(
                 input_ids,
                 cm_attention_masks,
@@ -322,6 +344,11 @@ def generate_output_prompt(
             del input_ids
             output_token = _get_tokens_from_logits(output)
             output_prompt = self.tokenizer.decode(output_token)
+
+            # Assistant generating end of token
+            if output_prompt in self.end_tokens:
+                break
+
             past_key_values = output[1:]
             hub_tokens = torch.cat((hub_tokens, output_token), dim=-1)
             print()
@@ -333,4 +360,3 @@ def generate_output_prompt(
         print("-------- Response Summary --------")
         print(f"Prompt: {input_prompt}")
         print(f"Response: {self.tokenizer.decode(hub_tokens)}")
-        return output_prompt
diff --git a/qai_hub_models/models/_shared/llama/demo.py b/qai_hub_models/models/_shared/llama/demo.py
new file mode 100644
index 00000000..d9c4fb09
--- /dev/null
+++ b/qai_hub_models/models/_shared/llama/demo.py
@@ -0,0 +1,185 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+from typing import Any, Callable, Dict, List, Set, Tuple, Type
+
+import qai_hub as hub
+
+from qai_hub_models.models._shared.llama.app import ChatApp as App
+from qai_hub_models.models._shared.llama.app import (
+    HubLlamaModelPipeline,
+    LlamaModelPipeline,
+)
+from qai_hub_models.models._shared.llama.model import DEFAULT_INPUT_SEQ_LEN
+from qai_hub_models.utils.args import (
+    get_model_cli_parser,
+    get_on_device_demo_parser,
+    validate_on_device_demo_args,
+)
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
+from qai_hub_models.utils.huggingface import has_model_access
+
+# Max output tokens to generate
+# You can override this with cli argument.
+# Keeping this short as on-device demo takes time to converge.
+MAX_OUTPUT_TOKENS = 10
+DEFAULT_DEVICE = "Samsung Galaxy S24 (Family)"
+DEFAULT_USER_PROMPT = "Hi! What is 2+3?"
+
+
+def llama_chat_demo(
+    model_cls: Type[BaseModel],
+    model_id: str,
+    get_model_class: Callable,
+    get_input_prompt_with_tags: Callable,
+    prepare_combined_attention_mask: Callable,
+    tokenizer: Any,
+    num_splits: int,
+    num_key_val_heads: int,
+    model_split_map: Dict[int, Tuple[int, int]],
+    end_tokens: Set[str],
+    hf_repo_name: str,
+    hf_repo_url: str,
+    default_prompt: str = DEFAULT_USER_PROMPT,
+    is_test: bool = False,
+    available_target_runtimes: List[TargetRuntime] = [TargetRuntime.QNN],
+):
+    """
+    Shared Chat Demo App to generate output for provided input prompt
+        model_cls: Model base class (either Prompt Processor or Token Generator)
+        model_id: Model ID from hub,
+        get_model_class: Function to get initialize model class,
+        get_input_prompt_with_tags: Function to wrap input prompt with appropriate tags,
+        prepare_combined_attention_mask: Function to combine attention mask,
+        tokenizer: Tokenizer to encode-decode prompt,
+        num_splits: Number of model splits,
+        num_key_val_heads: Number of heads in past key-value cache,
+        model_split_map: Map for split number to decoder layer ranges,
+        end_tokens: Set of end tokens to use for end of output generation,
+        hf_repo_name: HF repo name,
+        hf_repo_url: HF repo url,
+        default_prompt: Default prompt to set,
+        is_test: If test, no options required,
+        available_target_runtimes: Default availble runtime in options,
+    """
+    # Demo parameters
+    parser = get_model_cli_parser(model_cls)
+    parser = get_on_device_demo_parser(
+        parser,
+        add_output_dir=True,
+        available_target_runtimes=available_target_runtimes,
+        default_device=DEFAULT_DEVICE,
+    )
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        default=default_prompt,
+        help="input prompt.",
+    )
+    parser.add_argument(
+        "--prompt-processor-input-seq-len",
+        type=int,
+        default=DEFAULT_INPUT_SEQ_LEN,
+        help="input sequence length for prompt-processor. This must be less than `max_position_embeddings` set for model.",
+    )
+    parser.add_argument(
+        "--max-output-tokens",
+        type=int,
+        default=MAX_OUTPUT_TOKENS,
+        help="max output tokens to generate.",
+    )
+    args = parser.parse_args([] if is_test else None)
+    validate_on_device_demo_args(args, model_id)
+
+    if not is_test:
+        print(f"\n{'-' * 85}")
+        print(f"** Generating response via {model_id} **")
+        print()
+        print("Prompt:", args.prompt)
+        print("Max number of output tokens to generate:", args.max_output_tokens)
+        print("Please pass `--max-output-tokens <int>` to generate longer responses.")
+        print()
+        print(
+            """NOTE: Each token generation takes around 15 mins on-device:
+    1. Model is divided into multiple parts to fit into device constraints
+    2. Each model requires separate execution on-device via AI Hub
+    3. Due to autoregressive nature, we cannot run step 2 in parallel
+    4. Device procurement is subject to device availability and might take longer to run demo on-device
+
+Alternative:
+    1. Run demo on host (with PyTorch) to verify e2e result for longer responses
+    2. Run demo on-device for shorter responses (--max-output-tokens 10 or 20)
+    3. [Optional] Can run demo on-device to generate long sentence (takes longer)
+
+We are actively working on to improve UX and reduce turn-around time for these models.
+"""
+        )
+        print(f"{'-' * 85}\n")
+
+    if not args.on_device:
+        prompt_processor = LlamaModelPipeline(
+            model_cls.from_pretrained(),
+            num_splits=num_splits,
+            num_past_key_val_heads=num_key_val_heads,
+            model_split_map=model_split_map,
+        )
+        token_generator = LlamaModelPipeline(
+            model_cls.from_pretrained(),
+            num_splits=num_splits,
+            num_past_key_val_heads=num_key_val_heads,
+            model_split_map=model_split_map,
+            is_token_generator=True,
+        )
+    else:
+        hub_model_ids = args.hub_model_id.split(",")
+        # First four models are Prompt Processor
+        # Last four models are Token Generator
+        if len(hub_model_ids) != num_splits * 2:
+            model_id_lists = ",".join(
+                [f"<id-{i}>" for i in range(1, num_splits * 2 + 1)]
+            )
+            raise RuntimeError(
+                "Please provide comma separated hub-model-ids for Llama Prompt Processor and Token Generator,"
+                f" e.g. --hub-model-id {model_id_lists}.\n"
+                "Specify model-ids for four Prompt Processor models first, then Token Generator models.\n"
+                "If you run export.py it will print out command to run on-device demo with ordered model-ids."
+            )
+
+        hub_device = hub.Device(args.device)
+        prompt_processor = HubLlamaModelPipeline(
+            hub_model_ids[:num_splits],
+            hub_device=hub_device,
+            inference_options=args.inference_options,
+            get_model_class=get_model_class,
+            num_past_key_val_heads=num_key_val_heads,
+            model_split_map=model_split_map,
+        )
+        token_generator = HubLlamaModelPipeline(
+            hub_model_ids[num_splits:],
+            hub_device=hub_device,
+            inference_options=args.inference_options,
+            get_model_class=get_model_class,
+            num_past_key_val_heads=num_key_val_heads,
+            model_split_map=model_split_map,
+            is_token_generator=True,
+        )
+
+    has_model_access(hf_repo_name, hf_repo_url)
+
+    app = App(
+        prompt_processor,
+        token_generator,
+        get_input_prompt_with_tags=get_input_prompt_with_tags,
+        prepare_combined_attention_mask=prepare_combined_attention_mask,
+        tokenizer=tokenizer,
+        end_tokens=end_tokens,
+        num_past_key_val_heads=num_key_val_heads,
+    )
+    app.generate_output_prompt(
+        args.prompt,
+        max_seq_len=args.prompt_processor_input_seq_len,
+        max_output_tokens=args.max_output_tokens,
+    )
diff --git a/qai_hub_models/models/_shared/llama/model.py b/qai_hub_models/models/_shared/llama/model.py
new file mode 100644
index 00000000..d7e70ed1
--- /dev/null
+++ b/qai_hub_models/models/_shared/llama/model.py
@@ -0,0 +1,261 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+import os
+import pickle
+from typing import List, Optional
+
+import torch
+from qai_hub.client import Device
+
+from qai_hub_models.models.common import (
+    SampleInputsType,
+    SourceModelFormat,
+    TargetRuntime,
+)
+from qai_hub_models.utils.aimet.aimet_dummy_model import AimetEncodingLoaderMixin
+from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, CachedWebModelAsset
+from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
+from qai_hub_models.utils.input_spec import InputSpec
+
+DEFAULT_INPUT_SEQ_LEN = 1024
+
+
+def get_hidden_layer_range_from_split(split_part: int, model_split_map: dict):
+    hidden_layers_start, hidden_layers_end = model_split_map[split_part]
+    return hidden_layers_start, hidden_layers_end
+
+
+def get_past_key_names(
+    start: int = 0, end: int = 8, num_of_past_key_heads=32, suffix=""
+):
+    past_key_val_name = []
+    for i in range(start, end):
+        cache_names = [
+            f"past_key_{i}_h{j}{suffix}" for j in range(num_of_past_key_heads)
+        ] + [f"past_value_{i}_h{j}{suffix}" for j in range(num_of_past_key_heads)]
+        past_key_val_name.extend(cache_names)
+    return past_key_val_name
+
+
+def save_input_cached_data(
+    data: dict,
+    split_part: int,
+    data_dir: str,
+    model_name: str,
+    model_id: str,
+    model_asset_version: str,
+    model_type: str = "pp",
+    input_seq_len: int = DEFAULT_INPUT_SEQ_LEN,
+):
+    data_path = (
+        f"{data_dir}/{input_seq_len}/{model_name}_{split_part}_{model_type}_inputs.pkl"
+    )
+
+    inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path(
+        model_id,
+        model_asset_version,
+        f"{data_path}",
+    )
+
+    # if already exists, no need to re-serialize.
+    if os.path.exists(inputs_pkl_path):
+        return
+
+    os.makedirs(os.path.dirname(inputs_pkl_path), exist_ok=True)
+    with open(f"{inputs_pkl_path}", "wb") as f:
+        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
+
+
+def load_input_cached_data(
+    split_part: int,
+    data_dir: str,
+    model_name: str,
+    model_id: str,
+    model_asset_version: str,
+    model_type: str = "pp",
+    input_seq_len: int = DEFAULT_INPUT_SEQ_LEN,
+):
+    data_path = (
+        f"{data_dir}/{input_seq_len}/{model_name}_{split_part}_{model_type}_inputs.pkl"
+    )
+    try:
+
+        # Load local data path if already generated
+        inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path(
+            model_id,
+            model_asset_version,
+            f"{data_path}",
+        )
+
+        # If local data path not found, fetch from server if available
+        if not os.path.exists(inputs_pkl_path):
+            inputs_pkl_path = CachedWebModelAsset.from_asset_store(
+                model_id,
+                model_asset_version,
+                data_path,
+            ).fetch()
+
+        with open(f"{inputs_pkl_path}", "rb") as f:
+            return pickle.load(f)
+    except Exception:
+        # Delete intermediate data file if error occurs
+        if os.path.exists(inputs_pkl_path):
+            os.remove(inputs_pkl_path)
+        print(
+            f"Unable to load cached data for {data_path}, creating data using PyTorch models."
+        )
+        # Unable to load cached data, return None
+        return None
+
+
+def get_past_keyval_with_shift(
+    past_key_vals: List[torch.Tensor], num_of_past_key_heads: int = 32
+):
+    """
+    Clip past key value to feed next iteration
+    """
+    tg_inputs = {}
+    total_key_val = num_of_past_key_heads * 2
+    for i in range(0, len(past_key_vals), total_key_val):
+        l_num = i // total_key_val
+        for j, key in enumerate(past_key_vals[i : i + num_of_past_key_heads]):
+            tg_inputs[f"past_key_{l_num}_h{j}"] = key[:, :, :, 1:].detach()
+
+        for j, val in enumerate(
+            past_key_vals[i + num_of_past_key_heads : i + total_key_val]
+        ):
+            tg_inputs[f"past_value_{l_num}_h{j}"] = val[:, :, 1:, :].detach()
+
+    return tg_inputs
+
+
+def make_torch_compatible_past_key_values(
+    decode_layers, past_key_val_per_layer, *past_values_flattened
+):
+    past_key_values = []
+    total_past_entries = len(past_values_flattened)
+
+    # past values consists of
+    # 1. k decode/hidden layers
+    # 2. each decode layer has 2 entries: key and value
+    # 3. each key-value entry is has <past_key_val_per_layer> layer
+    if total_past_entries != decode_layers * past_key_val_per_layer * 2:
+        raise RuntimeError(
+            "Incorrect number of past key-values provided for model."
+            f"Expecting {decode_layers * past_key_val_per_layer * 2}, got {total_past_entries}."
+        )
+
+    for i in range(0, decode_layers * 2, 2):
+        keys = past_values_flattened[
+            i * past_key_val_per_layer : (i + 1) * past_key_val_per_layer
+        ]
+        values = past_values_flattened[
+            (i + 1) * past_key_val_per_layer : (i + 2) * past_key_val_per_layer
+        ]
+
+        past_key_values.append((keys, values))
+    return tuple(past_key_values)
+
+
+class RopeEmbedding:
+    """
+    Compute Rotary Position Embedding
+    Ref: https://arxiv.org/pdf/2104.09864
+
+    Compute RopeEmbedding outside model to simplify model quantization
+    """
+
+    def __init__(self, head_dim: int = 128, max_length: int = 1024):
+        """
+        head_dim: dimension size of head
+        max_length: max sequence length to expect
+        """
+        self.max_length = max_length
+        self.cos, self.sin = self.precompute_freqs_cis(head_dim, max_length * 2)
+
+    def precompute_freqs_cis(self, dim: int, end: int, theta: float = 10000.0):
+        """
+        Precompute embeeding matrix
+        """
+        freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
+        t = torch.arange(end)
+        freqs = torch.outer(t, freqs).float()
+        freqs_cis = torch.polar(torch.ones_like(freqs), freqs)
+        freqs_cis = freqs_cis[0 : self.max_length]
+        freqs_real = torch.view_as_real(freqs_cis)
+        freqs_real = freqs_real.unsqueeze(0).unsqueeze(0)
+
+        freqs_cos = freqs_real[:, :, :, :, 0]  # extract even elements
+        freqs_sin = freqs_real[:, :, :, :, 1]  # extract odd elements
+        return freqs_cos, freqs_sin
+
+    def get_embedding(self, position_ids: torch.Tensor):
+        """
+        position_ids: [batch_size, sequence_length]
+        return [batch_size, 1, sequence_length, head_sim//2][2]
+        """
+        cos = self.cos[0, 0, :, :]  # [seq_len, dim]
+        sin = self.sin[0, 0, :, :]  # [seq_len, dim]
+        cos = cos[position_ids].unsqueeze(1)
+        sin = sin[position_ids].unsqueeze(1)
+        return cos, sin
+
+
+class Llama_QuantizedMixin(AimetEncodingLoaderMixin, BaseModel):
+    def __init__(self, model, encoding_path, is_token_generator=False):
+        AimetEncodingLoaderMixin.__init__(self, model, encoding_path)
+        BaseModel.__init__(self)
+        self.model = model
+        self.split_part = 1
+        self.is_token_generator = is_token_generator
+
+    def get_hub_compile_options(
+        self,
+        target_runtime: TargetRuntime,
+        other_compile_options: str = "",
+        device: Optional[Device] = None,
+    ) -> str:
+        if target_runtime != TargetRuntime.QNN:
+            raise RuntimeError(
+                f"Unsupported target_runtime provided: {target_runtime}."
+                " Only QNN runtime is supported for Llama for now."
+            )
+
+        return " --target_runtime qnn_context_binary --quantize_full_type w8a16 --quantize_io"
+
+    @staticmethod
+    def get_output_names(
+        start: int = 0,
+        end: int = 8,
+        past_key_val_heads: int = 32,
+        output_name: str = "",
+    ):
+        # Clipped hidden layers are named same as first part for all parts
+        # Eventually, each split should have respective names.
+        # layer_start, layer_end = get_hidden_layer_range_from_split(split_part=split_part, model_split_map=model_split_map)
+        layer_range = end - start
+        output_list = [
+            output_name if output_name else f"layers_{layer_range - 1}_add_out_0"
+        ]
+        output_list += get_past_key_names(
+            0, layer_range, num_of_past_key_heads=past_key_val_heads, suffix="_out"
+        )
+        return output_list
+
+    def sample_inputs(self, input_spec: Optional[InputSpec] = None) -> SampleInputsType:
+        data = self.get_calibration_data(input_spec=input_spec)
+        for key, val in data.items():
+            data[key] = [val.detach().numpy()]
+        return data
+
+    def preferred_hub_source_model_format(
+        self, target_runtime: TargetRuntime
+    ) -> SourceModelFormat:
+        """
+        Source model format preferred for conversion on AI Hub.
+        """
+        return SourceModelFormat.ONNX
diff --git a/qai_hub_models/models/_shared/video_classifier/app.py b/qai_hub_models/models/_shared/video_classifier/app.py
index 8c92fe2e..13ed6312 100644
--- a/qai_hub_models/models/_shared/video_classifier/app.py
+++ b/qai_hub_models/models/_shared/video_classifier/app.py
@@ -153,7 +153,7 @@ class KineticsClassifierApp:
     """
 
     def __init__(self, model: KineticsClassifier):
-        self.model = model.eval()
+        self.model = model
 
     def predict(self, path: str | Path) -> List[str]:
         """
diff --git a/qai_hub_models/models/_shared/whisper/app.py b/qai_hub_models/models/_shared/whisper/app.py
index 27deac3d..fb3f9978 100644
--- a/qai_hub_models/models/_shared/whisper/app.py
+++ b/qai_hub_models/models/_shared/whisper/app.py
@@ -7,21 +7,22 @@
 from typing import List, Tuple
 
 import numpy as np
+import samplerate
 import torch
 import whisper  # type: ignore
 from scipy import special as scipy_special  # type: ignore
 
-from qai_hub_models.models._shared.whisper.model import Whisper
+from qai_hub_models.models._shared.whisper.model import (
+    CHUNK_LENGTH,
+    HOP_LENGTH,
+    MEL_FILTER_PATH,
+    N_FFT,
+    N_MELS,
+    SAMPLE_RATE,
+    Whisper,
+)
 from qai_hub_models.utils.model_adapters import TorchNumpyAdapter
 
-# hard-coded audio hyperparameters
-SAMPLE_RATE = 16000
-N_FFT = 400
-N_MELS = 80
-HOP_LENGTH = 160
-CHUNK_LENGTH = 30
-N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE  # 480000 samples in a 30-second chunk
-
 
 class WhisperApp:
     """
@@ -30,7 +31,15 @@ class WhisperApp:
     OpenAI Whisper.
     """
 
-    def __init__(self, whisper: Whisper):
+    def __init__(
+        self,
+        whisper: Whisper,
+        mel_filter: np.ndarray | None = None,
+        sample_rate: int = SAMPLE_RATE,
+        max_audio_seconds: int = CHUNK_LENGTH,
+        n_fft: int = N_FFT,
+        hop_length: int = HOP_LENGTH,
+    ):
         decoder = whisper.decoder.to("cpu")
         encoder = whisper.encoder.to("cpu")
         self.num_decoder_blocks = whisper.num_decoder_blocks
@@ -38,13 +47,25 @@ def __init__(self, whisper: Whisper):
         self.attention_dim = whisper.attention_dim
         self.mean_decode_len = whisper.mean_decode_len
 
+        self.mel_filter = mel_filter
+        if not self.mel_filter:
+            MEL_FILTER_PATH.fetch()
+            with np.load(MEL_FILTER_PATH.path()) as f:
+                self.mel_filter = f[f"mel_{N_MELS}"]
+
+        self.hop_length = hop_length
+        self.sample_rate = sample_rate
+        self.max_audio_seconds = max_audio_seconds
+        self.n_fft = n_fft
+        self.max_audio_samples = self.max_audio_seconds * self.sample_rate
+
         # Wraps torch Module so it takes np ndarray as input and outputs
         if isinstance(encoder, torch.nn.Module):
             self.encoder = TorchNumpyAdapter(encoder)
         else:
             self.encoder = encoder
         if isinstance(decoder, torch.nn.Module):
-            self.decoder = TorchNumpyAdapter(decoder.eval())
+            self.decoder = TorchNumpyAdapter(decoder)
         else:
             self.decoder = decoder
 
@@ -52,18 +73,57 @@ def predict(self, *args, **kwargs):
         # See transcribe.
         return self.transcribe(*args, **kwargs)
 
-    def transcribe(self, mel_input: np.ndarray) -> str:
+    def transcribe(
+        self, audio: np.ndarray | str, audio_sample_rate: int | None = None
+    ) -> str:
+        """
+        Transcribe the provided audio to text.
+
+        Parameters
+        ----------
+        audio: numpy array | str
+            Path to audio file if a string.
+            Raw audio array of shape (# of samples) if a numpy array.
+
+        audio_sample_rate: int | None
+            The sample rate of the provided audio, in samples / second.
+            If audio is a numpy array, this must be provided.
+            If audio is a file and audio_sample_rate is None, this is ignored and the sample rate will be derived from the audio file.
+
+        Returns
+        -------
+        List of audio arrays, chunked into N arrays of model_chunk_seconds seconds.
         """
-        Transcribe an audio to text.
+        if isinstance(audio, str):
+            import audio2numpy as a2n  # import here, as this requires ffmpeg to be installed on host machine
+
+            audio, audio_sample_rate = a2n.audio_from_file(audio)
+        else:
+            assert audio_sample_rate is not None
+
+        return " ".join(
+            self._transcribe_single_chunk(x)
+            for x in chunk_and_resample_audio(audio, audio_sample_rate)
+        )
+
+    def _transcribe_single_chunk(self, audio: np.ndarray) -> str:
+        """
+        Transcribe an audio chunk to text.
 
         Parameters:
 
-        - mel_input: of shape (1, 80, 3000). Mel spectrogram of 30s audio.
+        audio: numpy array
+            A numpy array of audio of shape (number of samples).
+            The sample rate of this audio must be self.sample_rate.
+            The maximum length of this audio must be self.max_audio_samples.
 
         Returns:
 
         - transcribed texts
         """
+        mel_input = log_mel_spectrogram(
+            self.mel_filter, audio, self.max_audio_samples, self.n_fft, self.hop_length
+        )
         k_cache_cross, v_cache_cross = self.encoder(mel_input)
         # Start decoding
         # coreml only takes float tensors
@@ -307,31 +367,13 @@ def apply_timestamp_rules(
     return logits, logprobs
 
 
-def load_audio(mel_filter: np.ndarray, audio_path: str) -> np.ndarray:
-    """
-    Load audio to a mel spectrogram.
-    """
-    with np.load(audio_path) as f:
-        audio_np = f["audio"]
-    # Pad 30-seconds of silence to the input audio, for slicing
-    input_feature = log_mel_spectrogram(mel_filter, audio_np, pad_to_length=N_SAMPLES)
-    # input_feature has fixed shape [1, 80, 3000]. 80 is
-    # spectrogram feature dim, 3000 is due to Whisper only takes
-    # 30 seconds input represented as 10ms spectrogram segments
-    assert input_feature.shape == (1, 80, 3000)
-    return input_feature
-
-
-def load_mel_filter(mel_filter_path: str) -> np.ndarray:
-    with np.load(mel_filter_path) as f:
-        return f["mel_80"]
-
-
 # Adopted from https://github.com/openai/whisper/blob/main/whisper/audio.py
 def log_mel_spectrogram(
     mel_filter: np.ndarray,
     audio_np: np.ndarray,
     pad_to_length: int,
+    n_fft: int,
+    hop_length: int,
 ) -> np.ndarray:
     """
     Compute the log-Mel spectrogram of
@@ -356,8 +398,8 @@ def log_mel_spectrogram(
         padding = pad_to_length - len(audio)
         if padding > 0:
             audio = torch.nn.functional.pad(audio, (0, padding))
-    window = torch.hann_window(N_FFT)
-    stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True)
+    window = torch.hann_window(n_fft)
+    stft = torch.stft(audio, n_fft, hop_length, window=window, return_complex=True)
     magnitudes = stft[..., :-1].abs() ** 2
 
     mel_spec = torch.from_numpy(mel_filter) @ magnitudes
@@ -366,3 +408,53 @@ def log_mel_spectrogram(
     log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
     log_spec = (log_spec + 4.0) / 4.0
     return log_spec.unsqueeze(0).detach().float().numpy()
+
+
+def chunk_and_resample_audio(
+    audio: np.ndarray,
+    audio_sample_rate: int,
+    model_sample_rate=SAMPLE_RATE,
+    model_chunk_seconds=CHUNK_LENGTH,
+) -> List[np.ndarray]:
+    """
+    Parameters
+    ----------
+    audio: str
+        Raw audio numpy array of shape [# of samples]
+
+    audio_sample_rate: int
+        Sample rate of audio array, in samples / sec.
+
+    model_sample_rate: int
+        Sample rate (samples / sec) required to run Whisper. The audio file
+        will be resampled to use this rate.
+
+    model_chunk_seconds: int
+        Split the audio in to N sequences of this many seconds.
+        The final split may be shorter than this many seconds.
+
+    Returns
+    -------
+    List of audio arrays, chunked into N arrays of model_chunk_seconds seconds.
+    """
+    if audio_sample_rate != model_sample_rate:
+        audio = samplerate.resample(audio, model_sample_rate / audio_sample_rate)
+        audio_sample_rate = model_sample_rate
+
+    number_of_full_length_audio_chunks = (
+        audio.shape[0] // audio_sample_rate // model_chunk_seconds
+    )
+    last_sample_in_full_length_audio_chunks = (
+        audio_sample_rate * number_of_full_length_audio_chunks * model_chunk_seconds
+    )
+
+    if number_of_full_length_audio_chunks == 0:
+        return [audio]
+
+    return [
+        *np.array_split(
+            audio[:last_sample_in_full_length_audio_chunks],
+            number_of_full_length_audio_chunks,
+        ),
+        audio[last_sample_in_full_length_audio_chunks:],
+    ]
diff --git a/qai_hub_models/models/_shared/whisper/demo.py b/qai_hub_models/models/_shared/whisper/demo.py
index bd9a4fa8..232f3750 100644
--- a/qai_hub_models/models/_shared/whisper/demo.py
+++ b/qai_hub_models/models/_shared/whisper/demo.py
@@ -2,19 +2,18 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-from typing import Type
+from typing import Tuple, Type
 
-from qai_hub_models.models._shared.whisper.app import (
-    WhisperApp,
-    load_audio,
-    load_mel_filter,
-)
+import numpy as np
+
+from qai_hub_models.models._shared.whisper.app import WhisperApp
 from qai_hub_models.models._shared.whisper.model import (
-    MEL_FILTER_PATH,
     MODEL_ASSET_VERSION,
     MODEL_ID,
+    SAMPLE_RATE,
     Whisper,
 )
+from qai_hub_models.utils.args import get_model_cli_parser
 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
 
 TEST_AUDIO_PATH = CachedWebModelAsset.from_asset_store(
@@ -22,19 +21,31 @@
 )
 
 
-def whisper_demo(model_cls: Type[Whisper]):
-    # For other model sizes, see https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17
-    app = WhisperApp(model_cls.from_pretrained())
+def load_demo_audio() -> Tuple[np.ndarray, int]:
     TEST_AUDIO_PATH.fetch()
-    MEL_FILTER_PATH.fetch()
+    with np.load(TEST_AUDIO_PATH.path()) as f:
+        return f["audio"], SAMPLE_RATE
 
-    # Load audio into mel spectrogram
-    mel_filter_path = MEL_FILTER_PATH.path()
-    mel_filter = load_mel_filter(mel_filter_path)
 
-    audio_path = TEST_AUDIO_PATH.path()
-    mel_input = load_audio(mel_filter, audio_path)
+def whisper_demo(model_cls: Type[Whisper], is_test: bool = False):
+    parser = get_model_cli_parser(model_cls)
+    parser.add_argument(
+        "--audio_file",
+        type=str,
+        default=None,
+        help="Audio file path or URL",
+    )
+    args = parser.parse_args([] if is_test else None)
+
+    # For other model sizes, see https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17
+    app = WhisperApp(model_cls.from_pretrained())
+
+    # Load default audio if file not provided
+    audio = args.audio_file
+    audio_sample_rate = None
+    if not audio:
+        audio, audio_sample_rate = load_demo_audio()
 
     # Perform transcription
-    transcription = app.transcribe(mel_input)
+    transcription = app.transcribe(audio, audio_sample_rate)
     print("Transcription:", transcription)
diff --git a/qai_hub_models/models/_shared/whisper/model.py b/qai_hub_models/models/_shared/whisper/model.py
index 1ea0acb4..cdc94c98 100644
--- a/qai_hub_models/models/_shared/whisper/model.py
+++ b/qai_hub_models/models/_shared/whisper/model.py
@@ -13,20 +13,40 @@
 from qai_hub_models.utils.base_model import BaseModel, CollectionModel, TargetRuntime
 from qai_hub_models.utils.input_spec import InputSpec
 
+MODEL_ID = "whisper_asr_shared"
+MODEL_ASSET_VERSION = 1
+
+# 20ms sample rate
+SAMPLE_RATE = 16000
+
+# Length of the Hann window signal used when applying a FFT to the audio.
+N_FFT = 400
+
+# Number of audio samples between adjacent STFT columns when applying FFT to the audio.
+HOP_LENGTH = 160
+
+# Audio chunk length in seconds
+CHUNK_LENGTH = 30
+
+# Samples per chunk
+N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE  # 480000 20ms samples in a 30-second chunk
+
 # The official default max decoded length is 448. We use mean decoded length 224 for benchmarking purpose
 MEAN_DECODE_LEN = 224
 
-# The number of 20ms audio contexts in 30 seconds of audio
-AUDIO_EMB_LEN = 1500
+# MEL filter to be applied to audio after applying FFT
+MEL_FILTER_PATH = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "openai_assets/mel_filters.npz"
+)
 
 # The number of Mel features per audio context
 N_MELS = 80
 
-MODEL_ID = "whisper_asr_shared"
-MODEL_ASSET_VERSION = 1
-MEL_FILTER_PATH = CachedWebModelAsset.from_asset_store(
-    MODEL_ID, MODEL_ASSET_VERSION, "openai_assets/mel_filters.npz"
-)
+# Audio embedding length
+AUDIO_EMB_LEN = int(N_SAMPLES / N_MELS / 4)
+
+# Audio length per MEL feature
+MELS_AUDIO_LEN = AUDIO_EMB_LEN * 2
 
 
 class Whisper(CollectionModel):
@@ -111,7 +131,7 @@ def get_input_spec() -> InputSpec:
         Returns the input specification (name -> (shape, type). This can be
         used to submit profiling job on Qualcomm AI Hub.
         """
-        return dict(audio=((1, N_MELS, AUDIO_EMB_LEN * 2), "float32"))
+        return dict(audio=((1, N_MELS, MELS_AUDIO_LEN), "float32"))
 
     @classmethod
     def from_pretrained(cls):
diff --git a/qai_hub_models/models/_shared/whisper/test_utils.py b/qai_hub_models/models/_shared/whisper/test_utils.py
index 9b4fb089..f3bef8b1 100644
--- a/qai_hub_models/models/_shared/whisper/test_utils.py
+++ b/qai_hub_models/models/_shared/whisper/test_utils.py
@@ -2,30 +2,26 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
+from typing import Tuple
+
 import numpy as np
 import torch
 import whisper
 
-from qai_hub_models.models._shared.whisper.app import (
-    WhisperApp,
-    load_audio,
-    load_mel_filter,
-)
-from qai_hub_models.models._shared.whisper.demo import TEST_AUDIO_PATH
-from qai_hub_models.models._shared.whisper.model import (
-    MEAN_DECODE_LEN,
-    MEL_FILTER_PATH,
-    Whisper,
-    WhisperDecoderInf,
-    WhisperEncoderInf,
-)
+from qai_hub_models.models._shared.whisper.app import WhisperApp, log_mel_spectrogram
+from qai_hub_models.models._shared.whisper.demo import load_demo_audio
+from qai_hub_models.models._shared.whisper.model import MEAN_DECODE_LEN, Whisper
 
 
-def load_mel_input() -> np.ndarray:
-    mel_filter_path = MEL_FILTER_PATH.fetch()
-    mel_filter = load_mel_filter(mel_filter_path)
-    audio_path = TEST_AUDIO_PATH.fetch()
-    return load_audio(mel_filter, audio_path)
+def load_sample_audio_input(app: WhisperApp) -> Tuple[np.ndarray, np.ndarray, int]:
+    audio, sample_rate = load_demo_audio()
+    return (
+        audio,
+        log_mel_spectrogram(
+            app.mel_filter, audio, app.max_audio_samples, app.n_fft, app.hop_length
+        ),
+        sample_rate,
+    )
 
 
 def run_test_wrapper_numerics(whisper_version):
@@ -35,20 +31,23 @@ def run_test_wrapper_numerics(whisper_version):
     processing) that matches with the
     original model's.
     """
+    app = WhisperApp(Whisper.from_source_model(whisper.load_model(whisper_version)))
+
+    # Load inputs
+    _, mel_input, _ = load_sample_audio_input(app)
+
     # OpenAI
-    mel_input = load_mel_input()
     with torch.no_grad():
+        model = whisper.load_model(whisper_version)
         mel_input = torch.from_numpy(mel_input)
-        model = whisper.load_model("tiny.en")
         audio_features = model.encoder(mel_input)
 
         tokens = torch.LongTensor([[50257]])
         logits_orig = model.decoder(tokens, audio_features).detach().numpy()
 
     # QAIHM
-    encoder = WhisperEncoderInf(model)
-    decoder = WhisperDecoderInf(model.decoder)
-
+    encoder = app.encoder.base_model
+    decoder = app.decoder.base_model
     k_cache_cross, v_cache_cross = encoder(mel_input)
     sample_len = MEAN_DECODE_LEN
 
@@ -86,7 +85,8 @@ def run_test_transcribe(whisper_version):
     Test that WhisperApp produces end to end transcription results that
     matches with the original model
     """
-    mel_input = load_mel_input()
+    app = WhisperApp(Whisper.from_source_model(whisper.load_model(whisper_version)))
+    audio, mel_input, sample_rate = load_sample_audio_input(app)
 
     # Run inference with OpenAI whisper
     with torch.no_grad():
@@ -97,8 +97,6 @@ def run_test_transcribe(whisper_version):
         results = model.decode(torch.from_numpy(mel_input).float(), options)
         text_orig = results[0].text
 
-    app = WhisperApp(Whisper.from_source_model(model))
-
     # Perform transcription
-    transcription = app.transcribe(mel_input)
+    transcription = app.transcribe(audio, sample_rate)
     assert transcription == text_orig
diff --git a/qai_hub_models/models/_shared/yolo/app.py b/qai_hub_models/models/_shared/yolo/app.py
index aea01975..12e28009 100644
--- a/qai_hub_models/models/_shared/yolo/app.py
+++ b/qai_hub_models/models/_shared/yolo/app.py
@@ -126,18 +126,15 @@ class scores per batch multiplied by confidence: List element shape is [num_pred
         self.check_image_size(NCHW_fp32_torch_frames)
 
         # Run prediction
-        with torch.no_grad():
-            if self.model_includes_postprocessing:
-                pred_boxes, pred_scores, pred_class_idx = self.model(
-                    NCHW_fp32_torch_frames
-                )
-            else:
-                model_output = self.model(NCHW_fp32_torch_frames)
-                if isinstance(model_output, torch.Tensor):
-                    model_output = (model_output,)
-                pred_boxes, pred_scores, pred_class_idx = self.pre_nms_postprocess(
-                    *model_output
-                )
+        if self.model_includes_postprocessing:
+            pred_boxes, pred_scores, pred_class_idx = self.model(NCHW_fp32_torch_frames)
+        else:
+            model_output = self.model(NCHW_fp32_torch_frames)
+            if isinstance(model_output, torch.Tensor):
+                model_output = (model_output,)
+            pred_boxes, pred_scores, pred_class_idx = self.pre_nms_postprocess(
+                *model_output
+            )
 
         # Non Maximum Suppression on each batch
         pred_boxes, pred_scores, pred_class_idx = batched_nms(
diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py
index ab8656d4..010d04b4 100644
--- a/qai_hub_models/models/aotgan/export.py
+++ b/qai_hub_models/models/aotgan/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,14 +117,13 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image,mask"
         + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image,mask", sample_inputs, target_runtime
             )
@@ -190,7 +189,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -212,7 +211,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -227,7 +226,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_onnx=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/aotgan/model.py b/qai_hub_models/models/aotgan/model.py
index d5d33563..f08c7afd 100644
--- a/qai_hub_models/models/aotgan/model.py
+++ b/qai_hub_models/models/aotgan/model.py
@@ -14,6 +14,7 @@
     CachedWebModelAsset,
     SourceAsRoot,
     load_image,
+    wipe_sys_modules,
 )
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.input_spec import InputSpec
@@ -66,6 +67,10 @@ def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS):
             MODEL_ASSET_VERSION,
             source_repo_patches=AOTGAN_SOURCE_PATCHES,
         ):
+            import src
+
+            wipe_sys_modules(src)
+
             from src.model.aotgan import InpaintGenerator
 
             # AOT-GAN InpaintGenerator uses ArgParser to
diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml
index 98f6395e..44744ef1 100644
--- a/qai_hub_models/models/aotgan/perf.yaml
+++ b/qai_hub_models/models/aotgan/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: AOT-GAN
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 164177.0
-      throughput: 6.0909871662900406
+      inference_time: 152887.0
+      throughput: 6.540778483455101
       estimated_peak_memory_range:
-        min: 3293184
-        max: 6670400
+        min: 16384
+        max: 10328880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: j1gle2z8p
+      job_id: jmg986lmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 165278.0
-      throughput: 6.050412033059452
+      inference_time: 152765.0
+      throughput: 6.546002029260629
       estimated_peak_memory_range:
-        min: 4321280
-        max: 32279608
+        min: 4263936
+        max: 23819360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jwgoen0xp
+      job_id: jqp48zl2g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +74,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:15:16Z'
+    timestamp: '2024-06-22T22:16:41Z'
   - torchscript_onnx_tflite:
-      inference_time: 120342.0
-      throughput: 8.309650828472188
+      inference_time: 112110.0
+      throughput: 8.919810900008919
       estimated_peak_memory_range:
-        min: 2510848
-        max: 224329120
+        min: 2555904
+        max: 193280688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +88,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jw56qzj0g
+      job_id: jnp13r4n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 121373.0
-      throughput: 8.2390647013751
+      inference_time: 112558.0
+      throughput: 8.884308534266777
       estimated_peak_memory_range:
-        min: 0
-        max: 141486816
+        min: 4222976
+        max: 114497360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: j1pvzrojg
+      job_id: j0pxmwk8g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +112,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:15:17Z'
+    timestamp: '2024-06-22T22:16:42Z'
   - torchscript_onnx_tflite:
-      inference_time: 164129.0
-      throughput: 6.092768493075568
+      inference_time: 152465.0
+      throughput: 6.558882366444758
       estimated_peak_memory_range:
-        min: 12288
-        max: 2291528
+        min: 3207168
+        max: 6406512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,14 +126,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: j1p3q13l5
+      job_id: jvgd0jx6p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 164665.0
-      throughput: 6.072935960890292
+      inference_time: 152929.0
+      throughput: 6.53898214203977
       estimated_peak_memory_range:
-        min: 4337664
-        max: 28704480
+        min: 4329472
+        max: 24015168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -139,7 +141,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: jlpe4w115
+      job_id: jegnxj6j5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +150,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:15:19Z'
+    timestamp: '2024-06-22T22:16:45Z'
+  - torchscript_onnx_tflite:
+      inference_time: 152823.0
+      throughput: 6.543517664225934
+      estimated_peak_memory_range:
+        min: 3313664
+        max: 6413032
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 235
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 235
+      job_id: jz576qyng
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 153171.0
+      throughput: 6.528650984847001
+      estimated_peak_memory_range:
+        min: 3194880
+        max: 22733104
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 275
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 275
+      job_id: jopr9zvkp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:16:46Z'
   - torchscript_onnx_qnn:
-      inference_time: 145570.0
-      throughput: 6.869547296833138
+      inference_time: 102536.0
+      throughput: 9.752672232191621
       estimated_peak_memory_range:
         min: 4202496
         max: 4202496
@@ -162,7 +202,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 275
-      job_id: j7gjk2mx5
+      job_id: jo5m4jn75
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -171,4 +211,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:15:18Z'
+    timestamp: '2024-06-22T22:16:44Z'
diff --git a/qai_hub_models/models/common.py b/qai_hub_models/models/common.py
index fc75c06c..c76a0f25 100644
--- a/qai_hub_models/models/common.py
+++ b/qai_hub_models/models/common.py
@@ -11,16 +11,18 @@
 class TargetRuntime(Enum):
     TFLITE = 0
     QNN = 1
-    ORT = 2
-    PRECOMPILED_ORT = 3
+    ONNX = 2
+    PRECOMPILED_QNN_ONNX = 3
 
     def __str__(self):
         return self.name.lower()
 
     @property
     def long_name(self):
-        if "precompiled" not in self.name.lower():
+        if self.name.lower() in {"tflite", "qnn"}:
             return f"torchscript_onnx_{self.name.lower()}"
+        elif self.name.lower() == "onnx":
+            return f"torchscript_{self.name.lower()}"
         return f"{self.name.lower()}"
 
 
diff --git a/qai_hub_models/models/controlnet_quantized/export.py b/qai_hub_models/models/controlnet_quantized/export.py
index fedb6349..022f7297 100644
--- a/qai_hub_models/models/controlnet_quantized/export.py
+++ b/qai_hub_models/models/controlnet_quantized/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py
index 3e52ca90..5b67cefd 100644
--- a/qai_hub_models/models/convnext_tiny/export.py
+++ b/qai_hub_models/models/convnext_tiny/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml
index 70048eca..e16e930b 100644
--- a/qai_hub_models/models/convnext_tiny/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ConvNext-Tiny
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 5717.0
-      throughput: 174.91691446562882
+      inference_time: 5590.0
+      throughput: 178.89087656529517
       estimated_peak_memory_range:
-        min: 45056
-        max: 2631376
+        min: 28672
+        max: 2570872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 328
-      job_id: jvgd7qleg
+      job_id: jogkdjvvp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3769.0
-      throughput: 265.32236667551075
+      inference_time: 3772.0
+      throughput: 265.11134676564154
       estimated_peak_memory_range:
-        min: 81920
-        max: 202159384
+        min: 86016
+        max: 202074888
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: j0pxe6215
+      job_id: j1p38ynm5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 16427.0
-      throughput: 60.875388080599016
+    torchscript_onnx:
+      inference_time: 16307.0
+      throughput: 61.32335806708775
       estimated_peak_memory_range:
-        min: 110592
-        max: 152489568
+        min: 12288
+        max: 154338792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jep239n4g
+      job_id: jygzw124g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:16:04Z'
+    timestamp: '2024-06-22T22:17:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 3988.0
-      throughput: 250.75225677031094
+      inference_time: 3967.0
+      throughput: 252.07965717166624
       estimated_peak_memory_range:
         min: 16384
-        max: 212477920
+        max: 216083312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 328
-      job_id: jz57vl3l5
+      job_id: jn5qwj0e5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2732.0
-      throughput: 366.03221083455344
+      inference_time: 2744.0
+      throughput: 364.4314868804665
       estimated_peak_memory_range:
-        min: 741376
-        max: 87297136
+        min: 618496
+        max: 80298128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jo5mv6yw5
+      job_id: jwgomjz15
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 11884.0
-      throughput: 84.14675193537529
+    torchscript_onnx:
+      inference_time: 11827.0
+      throughput: 84.5522955948254
       estimated_peak_memory_range:
-        min: 139571200
-        max: 200346752
+        min: 643072
+        max: 54405632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jqpyvj07p
+      job_id: jz5wxjw4p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:16:05Z'
+    timestamp: '2024-06-22T22:17:36Z'
   - torchscript_onnx_tflite:
-      inference_time: 5701.0
-      throughput: 175.40782318891422
+      inference_time: 5622.0
+      throughput: 177.87264318747776
       estimated_peak_memory_range:
-        min: 49152
-        max: 2985728
+        min: 53248
+        max: 2296200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 328
-      job_id: jqp4jd0vp
+      job_id: j1gl7j425
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3779.0
-      throughput: 264.6202699126753
+      inference_time: 3788.0
+      throughput: 263.99155227032736
       estimated_peak_memory_range:
-        min: 94208
-        max: 182002576
+        min: 86016
+        max: 160277224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jopr12j9g
+      job_id: j7gj1jd1g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:16:03Z'
+    timestamp: '2024-06-22T22:17:32Z'
+  - torchscript_onnx_tflite:
+      inference_time: 5592.0
+      throughput: 178.826895565093
+      estimated_peak_memory_range:
+        min: 28672
+        max: 2290704
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 328
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 328
+      job_id: jw56vk2np
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 3785.0
+      throughput: 264.2007926023778
+      estimated_peak_memory_range:
+        min: 32768
+        max: 202380832
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 223
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 223
+      job_id: jlpe2jo8p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:17:34Z'
   - torchscript_onnx_qnn:
-      inference_time: 3907.0
-      throughput: 255.9508574353724
+      inference_time: 3768.0
+      throughput: 265.3927813163482
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jegnrm8r5
+      job_id: j1pv4jqzp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 16908.0
-      throughput: 59.143600662408325
+    torchscript_onnx:
+      inference_time: 16939.0
+      throughput: 59.03536218194699
       estimated_peak_memory_range:
-        min: 294563840
-        max: 294563840
+        min: 294608896
+        max: 294608896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: j2p0e2765
+      job_id: jmg9860mp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:16:06Z'
+    timestamp: '2024-06-22T22:17:37Z'
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py
index 39153360..e5aac917 100644
--- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_tflite=False, supports_onnx=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml
index 431c0b4f..08823d30 100644
--- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml
@@ -11,6 +11,8 @@ aggregated:
   - QCS8250 (Proxy)
   - QCS8550 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -30,6 +32,8 @@ aggregated:
   supported_chipsets:
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -39,11 +43,11 @@ models:
 - name: ConvNext-Tiny-w8a16-Quantized
   performance_metrics:
   - torchscript_onnx_qnn:
-      inference_time: 3272.0
-      throughput: 305.6234718826406
+      inference_time: 3120.0
+      throughput: 320.5128205128205
       estimated_peak_memory_range:
-        min: 323584
-        max: 8383168
+        min: 16384
+        max: 118146192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -51,7 +55,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jogkrqm25
+      job_id: jvgd0jn6p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -60,13 +64,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:16:49Z'
+    timestamp: '2024-06-22T22:18:20Z'
   - torchscript_onnx_qnn:
-      inference_time: 2286.0
-      throughput: 437.4453193350831
+      inference_time: 2222.0
+      throughput: 450.04500450045003
       estimated_peak_memory_range:
-        min: 0
-        max: 90112528
+        min: 315392
+        max: 80248880
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -74,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jn5q9ro4p
+      job_id: jz5wxjwzp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -83,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:16:50Z'
+    timestamp: '2024-06-22T22:18:21Z'
   - torchscript_onnx_qnn:
-      inference_time: 3255.0
-      throughput: 307.21966205837174
+      inference_time: 3133.0
+      throughput: 319.1828917969997
       estimated_peak_memory_range:
-        min: 16384
-        max: 11232112
+        min: 28672
+        max: 97061744
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -97,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jw56qzl0g
+      job_id: jnp13r2k5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -106,13 +110,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:16:52Z'
+    timestamp: '2024-06-22T22:18:23Z'
+  - torchscript_onnx_qnn:
+      inference_time: 3120.0
+      throughput: 320.5128205128205
+      estimated_peak_memory_range:
+        min: 192512
+        max: 129273744
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 215
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 215
+      job_id: jvgd0jnkp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:18:25Z'
   - torchscript_onnx_qnn:
-      inference_time: 3567.0
-      throughput: 280.3476310625175
+      inference_time: 3257.0
+      throughput: 307.0310101320233
       estimated_peak_memory_range:
-        min: 221184
-        max: 221184
+        min: 331776
+        max: 331776
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -120,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: j1gle2r8p
+      job_id: jmg9860qp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -129,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:16:51Z'
+    timestamp: '2024-06-22T22:18:22Z'
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py
index 54c73379..66fc1288 100644
--- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_tflite=False, supports_onnx=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml
index cc741e4a..d1248c52 100644
--- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml
+++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml
@@ -11,6 +11,8 @@ aggregated:
   - QCS8250 (Proxy)
   - QCS8550 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -30,6 +32,8 @@ aggregated:
   supported_chipsets:
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -39,11 +43,11 @@ models:
 - name: ConvNext-Tiny-w8a8-Quantized
   performance_metrics:
   - torchscript_onnx_qnn:
-      inference_time: 1723.0
-      throughput: 580.3830528148578
+      inference_time: 1711.0
+      throughput: 584.4535359438925
       estimated_peak_memory_range:
-        min: 12288
-        max: 127334120
+        min: 16384
+        max: 126900200
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -51,7 +55,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jygzvjkkp
+      job_id: jqpyn9rrg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -60,13 +64,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:17:38Z'
+    timestamp: '2024-06-22T22:19:13Z'
   - torchscript_onnx_qnn:
-      inference_time: 1207.0
-      throughput: 828.5004142502071
+      inference_time: 1194.0
+      throughput: 837.5209380234506
       estimated_peak_memory_range:
-        min: 12288
-        max: 87553664
+        min: 0
+        max: 78843680
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -74,7 +78,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jz5wmq66g
+      job_id: j2p0kn325
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -83,13 +87,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:17:39Z'
+    timestamp: '2024-06-22T22:19:15Z'
   - torchscript_onnx_qnn:
-      inference_time: 1724.0
-      throughput: 580.046403712297
+      inference_time: 1730.0
+      throughput: 578.0346820809249
       estimated_peak_memory_range:
-        min: 20480
-        max: 10474536
+        min: 167936
+        max: 9009184
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -97,7 +101,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jnp1qez2g
+      job_id: jogkdj7yp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -106,13 +110,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:17:41Z'
+    timestamp: '2024-06-22T22:19:17Z'
+  - torchscript_onnx_qnn:
+      inference_time: 1732.0
+      throughput: 577.3672055427252
+      estimated_peak_memory_range:
+        min: 12288
+        max: 116157904
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 215
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 215
+      job_id: jn5qwje75
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:19:18Z'
   - torchscript_onnx_qnn:
-      inference_time: 1917.0
-      throughput: 521.6484089723526
+      inference_time: 1814.0
+      throughput: 551.2679162072767
       estimated_peak_memory_range:
-        min: 503808
-        max: 503808
+        min: 442368
+        max: 442368
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -120,7 +147,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 215
-      job_id: jmg99wnlg
+      job_id: j1p88l0zp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -129,4 +156,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:17:40Z'
+    timestamp: '2024-06-22T22:19:16Z'
diff --git a/qai_hub_models/models/ddrnet23_slim/app.py b/qai_hub_models/models/ddrnet23_slim/app.py
index 50ecac58..ee216629 100644
--- a/qai_hub_models/models/ddrnet23_slim/app.py
+++ b/qai_hub_models/models/ddrnet23_slim/app.py
@@ -77,9 +77,8 @@ def segment_image(
         input_transform = normalize_image_transform()
         NCHW_fp32_torch_frames = input_transform(NCHW_fp32_torch_frames)
 
-        with torch.no_grad():
-            # pred_mask is 8x downsampled
-            pred_masks = self.model(NCHW_fp32_torch_frames)
+        # pred_mask is 8x downsampled
+        pred_masks = self.model(NCHW_fp32_torch_frames)
 
         # Upsample pred mask to original image size
         # Need to upsample in the probability space, not in class labels
diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py
index 6f64655c..4d06a440 100644
--- a/qai_hub_models/models/ddrnet23_slim/export.py
+++ b/qai_hub_models/models/ddrnet23_slim/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -225,7 +224,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/ddrnet23_slim/model.py b/qai_hub_models/models/ddrnet23_slim/model.py
index b9e0d9c8..5fbcac67 100644
--- a/qai_hub_models/models/ddrnet23_slim/model.py
+++ b/qai_hub_models/models/ddrnet23_slim/model.py
@@ -74,7 +74,7 @@ def from_pretrained(cls, checkpoint_path: str | None = None):
             model_dict.update(pretrained_dict)
             ddrnetslim_model.load_state_dict(model_dict)
 
-            ddrnetslim_model.to(torch.device("cpu")).eval()
+            ddrnetslim_model.to(torch.device("cpu"))
 
             return cls(ddrnetslim_model)
 
diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml
index 0bb7c379..3c1376d8 100644
--- a/qai_hub_models/models/ddrnet23_slim/perf.yaml
+++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: DDRNet23-Slim
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6650.0
-      throughput: 150.37593984962405
+      inference_time: 5158.0
+      throughput: 193.87359441644048
       estimated_peak_memory_range:
-        min: 57344
-        max: 27662296
+        min: 1024000
+        max: 2977832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: jo5mv3xw5
+      job_id: jlpe2je7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9735.0
-      throughput: 102.7221366204417
+    torchscript_onnx:
+      inference_time: 9639.0
+      throughput: 103.74520178441747
       estimated_peak_memory_range:
-        min: 12599296
-        max: 48937112
+        min: 11792384
+        max: 39932640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 155
-      job_id: jep23ly4g
+      job_id: jo5m4jqy5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +74,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:18:08Z'
+    timestamp: '2024-06-22T22:19:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 4742.0
-      throughput: 210.88148460565162
+      inference_time: 3672.0
+      throughput: 272.33115468409585
       estimated_peak_memory_range:
-        min: 16384
-        max: 73234384
+        min: 32768
+        max: 70885712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +88,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: jegnr3vr5
+      job_id: jygzw1ozg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6012.0
-      throughput: 166.333998669328
+    torchscript_onnx:
+      inference_time: 6290.0
+      throughput: 158.9825119236884
       estimated_peak_memory_range:
-        min: 524288
-        max: 42757008
+        min: 11943936
+        max: 50404560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 155
-      job_id: jqpyv637p
+      job_id: jegnxjmv5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +112,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:18:09Z'
+    timestamp: '2024-06-22T22:19:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 6672.0
-      throughput: 149.8800959232614
+      inference_time: 5176.0
+      throughput: 193.19938176197837
       estimated_peak_memory_range:
-        min: 991232
-        max: 15704000
+        min: 1015808
+        max: 16060832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +126,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 131
-      job_id: jopr1e39g
+      job_id: jz5wxj2zp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,10 +135,33 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:18:06Z'
-  - torchscript_onnx_ort:
-      inference_time: 9609.0
-      throughput: 104.06910188365075
+    timestamp: '2024-06-22T22:19:46Z'
+  - torchscript_onnx_tflite:
+      inference_time: 5150.0
+      throughput: 194.1747572815534
+      estimated_peak_memory_range:
+        min: 1003520
+        max: 2689176
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 131
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 131
+      job_id: jmg986jqp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:19:47Z'
+  - torchscript_onnx:
+      inference_time: 9690.0
+      throughput: 103.19917440660474
       estimated_peak_memory_range:
         min: 9854976
         max: 9854976
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 155
-      job_id: j2p0el065
+      job_id: jopr9z2vp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:18:10Z'
+    timestamp: '2024-06-22T22:19:56Z'
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py
index e1a7394c..bcb43a12 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py
index 29b2cacb..35adb487 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/model.py
@@ -29,7 +29,6 @@ def from_pretrained(cls, normalize_input: bool = True) -> DeepLabV3PlusMobilenet
         ).fetch()
         checkpoint = torch.load(dst, map_location=torch.device("cpu"))
         model.load_state_dict(checkpoint["state_dict"])
-        model.eval()
 
         return cls(model, normalize_input)
 
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml
index dc3430eb..a9ad61d7 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: DeepLabV3-Plus-MobileNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 13047.0
-      throughput: 76.64597225415804
+      inference_time: 13087.0
+      throughput: 76.41170627340108
       estimated_peak_memory_range:
-        min: 21032960
-        max: 22679264
+        min: 19292160
+        max: 21253792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 98
-      job_id: jogkr3x25
+      job_id: jqpyn9jrg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 12852.0
-      throughput: 77.8089013383131
+      inference_time: 12849.0
+      throughput: 77.82706825433885
       estimated_peak_memory_range:
-        min: 4210688
-        max: 20359032
+        min: 3178496
+        max: 18335232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jw56qn40g
+      job_id: jn5qwjr75
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 17763.0
-      throughput: 56.296796712267074
+    torchscript_onnx:
+      inference_time: 18989.0
+      throughput: 52.66206751277055
       estimated_peak_memory_range:
-        min: 40357888
-        max: 70272240
+        min: 38780928
+        max: 77536960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j7gjkenx5
+      job_id: j1pv4jr7p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:18:39Z'
+    timestamp: '2024-06-22T22:20:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 9612.0
-      throughput: 104.03662089055348
+      inference_time: 9601.0
+      throughput: 104.15581710238517
       estimated_peak_memory_range:
-        min: 32768
-        max: 69905408
+        min: 36864
+        max: 74388512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 98
-      job_id: jn5q93q4p
+      job_id: j2p0kn225
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 9482.0
-      throughput: 105.4629824931449
+      inference_time: 9463.0
+      throughput: 105.67473317129874
       estimated_peak_memory_range:
-        min: 3174400
-        max: 58616848
+        min: 3194880
+        max: 52667232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: j1p3qe0l5
+      job_id: j1gl7j2e5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 13976.0
-      throughput: 71.55123068116771
+    torchscript_onnx:
+      inference_time: 14165.0
+      throughput: 70.5965407695023
       estimated_peak_memory_range:
-        min: 53886976
-        max: 88707568
+        min: 50692096
+        max: 83221760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jlpe4km15
+      job_id: j7gj1j27g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:18:40Z'
+    timestamp: '2024-06-22T22:20:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 13150.0
-      throughput: 76.04562737642586
+      inference_time: 13215.0
+      throughput: 75.67158531971245
       estimated_peak_memory_range:
-        min: 22147072
-        max: 24149720
+        min: 22130688
+        max: 24019416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 98
-      job_id: j1gle3m8p
+      job_id: j1p88lmzp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 12879.0
-      throughput: 77.64577995185962
+      inference_time: 12920.0
+      throughput: 77.39938080495357
       estimated_peak_memory_range:
-        min: 3198976
-        max: 19885424
+        min: 3182592
+        max: 18873576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: j1pvzvkjg
+      job_id: j1p38y1x5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:18:38Z'
+    timestamp: '2024-06-22T22:20:26Z'
+  - torchscript_onnx_tflite:
+      inference_time: 13135.0
+      throughput: 76.13247049866769
+      estimated_peak_memory_range:
+        min: 22151168
+        max: 38548096
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 98
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 98
+      job_id: jogkdjqyp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 12927.0
+      throughput: 77.35746886361878
+      estimated_peak_memory_range:
+        min: 3198976
+        max: 17945496
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 124
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 124
+      job_id: jwgomjn45
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:20:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 16510.0
-      throughput: 60.56935190793458
+      inference_time: 12858.0
+      throughput: 77.77259293824856
       estimated_peak_memory_range:
         min: 3170304
         max: 3170304
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 124
-      job_id: jwgoe36xp
+      job_id: jw56vkzvp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 16653.0
-      throughput: 60.04924037710923
+    torchscript_onnx:
+      inference_time: 16624.0
+      throughput: 60.15399422521656
       estimated_peak_memory_range:
-        min: 105144320
-        max: 105144320
+        min: 104112128
+        max: 104112128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jygzvrdkp
+      job_id: jlpe2jw7p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:18:41Z'
+    timestamp: '2024-06-22T22:20:31Z'
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py
index 88702832..9aa372f3 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -127,7 +127,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -177,7 +177,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -217,7 +217,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py
index 88e47bf8..5965fb3d 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py
@@ -79,6 +79,5 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         final_model = cls(sim)
         return final_model
diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml
index d4f699a6..368425e4 100644
--- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: DeepLabV3-Plus-MobileNet-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3596.0
-      throughput: 278.08676307007784
+      inference_time: 3613.0
+      throughput: 276.7783005812344
       estimated_peak_memory_range:
-        min: 16384
-        max: 1830768
+        min: 24576
+        max: 2019400
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 102
-      job_id: jmg99wllg
+      job_id: jz5wxj3zp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5322.0
-      throughput: 187.89928598271325
+      inference_time: 5334.0
+      throughput: 187.47656542932134
       estimated_peak_memory_range:
-        min: 806912
-        max: 7295144
+        min: 811008
+        max: 7181872
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jnp1qe48g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 16175.0
-      throughput: 61.82380216383308
-      estimated_peak_memory_range:
-        min: 42803200
-        max: 54255496
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 120
-        layers_on_gpu: 0
-        layers_on_cpu: 51
-        total_layers: 171
-      job_id: j0pxeyk35
+      job_id: j0pxmw6jg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:19:29Z'
+    timestamp: '2024-06-22T22:21:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 2668.0
-      throughput: 374.8125937031484
+      inference_time: 2688.0
+      throughput: 372.0238095238095
       estimated_peak_memory_range:
         min: 12288
-        max: 60104416
+        max: 61340080
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 102
-      job_id: jnp1qe42g
+      job_id: jmg986yqp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3937.0
-      throughput: 254.00050800101602
+      inference_time: 3901.0
+      throughput: 256.3445270443476
       estimated_peak_memory_range:
         min: 802816
-        max: 61474288
+        max: 57510912
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jvgd7oxrg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 12210.0
-      throughput: 81.9000819000819
-      estimated_peak_memory_range:
-        min: 33013760
-        max: 87227648
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 120
-        layers_on_gpu: 0
-        layers_on_cpu: 51
-        total_layers: 171
-      job_id: jo5mv3nd5
+      job_id: jo5m4j6y5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:19:30Z'
+    timestamp: '2024-06-22T22:21:18Z'
   - torchscript_onnx_tflite:
       inference_time: 3596.0
       throughput: 278.08676307007784
       estimated_peak_memory_range:
         min: 12288
-        max: 8750088
+        max: 9579064
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 102
-      job_id: jvgd7oxeg
+      job_id: jnp13rwk5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5333.0
-      throughput: 187.51171948246764
+      inference_time: 5340.0
+      throughput: 187.26591760299627
       estimated_peak_memory_range:
-        min: 20480
-        max: 12661968
+        min: 0
+        max: 156467336
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jqp4jvl8p
+      job_id: jopr9zevp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:19:28Z'
+    timestamp: '2024-06-22T22:21:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 14989.0
-      throughput: 66.71559143371806
+      inference_time: 3607.0
+      throughput: 277.2387025228722
       estimated_peak_memory_range:
-        min: 12288
-        max: 39155056
+        min: 16384
+        max: 2009088
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 102
-      job_id: jz5wmqe3g
+      job_id: jvgd0jqkp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 5336.0
+      throughput: 187.4062968515742
+      estimated_peak_memory_range:
+        min: 24576
+        max: 11405976
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 100
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 100
+      job_id: jep2j2lx5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:21:21Z'
+  - torchscript_onnx_tflite:
+      inference_time: 14963.0
+      throughput: 66.83151774376796
+      estimated_peak_memory_range:
+        min: 40960
+        max: 41957392
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 102
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 102
+      job_id: jz576qlqg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T22:19:23Z'
+    timestamp: '2024-06-22T22:21:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 126163.0
-      throughput: 7.926254131559966
+      inference_time: 121839.0
+      throughput: 8.20755258989322
       estimated_peak_memory_range:
-        min: 11575296
-        max: 14463800
+        min: 11517952
+        max: 28744328
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 3
         layers_on_cpu: 0
         total_layers: 102
-      job_id: jmg99wlwg
+      job_id: jqp48zdqg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T22:19:24Z'
+    timestamp: '2024-06-22T22:21:15Z'
   - torchscript_onnx_qnn:
-      inference_time: 5241.0
-      throughput: 190.80328181644725
+      inference_time: 4512.0
+      throughput: 221.63120567375887
       estimated_peak_memory_range:
-        min: 798720
-        max: 798720
+        min: 815104
+        max: 815104
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 100
-      job_id: jz57vxyv5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 22921.0
-      throughput: 43.628113956633655
-      estimated_peak_memory_range:
-        min: 59097088
-        max: 59097088
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 120
-        layers_on_gpu: 0
-        layers_on_cpu: 51
-        total_layers: 171
-      job_id: jegnr36k5
+      job_id: jegnxj3v5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:19:31Z'
+    timestamp: '2024-06-22T22:21:19Z'
diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py
index 1f3fda9b..f9d79a7f 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/export.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,14 +117,13 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
         + " --force_channel_last_output output_0,output_1"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -210,7 +209,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0,output_1", inference_result, target_runtime
             )
@@ -228,8 +227,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/deeplabv3_resnet50/model.py b/qai_hub_models/models/deeplabv3_resnet50/model.py
index 9dc8cdb7..d43dff14 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/model.py
+++ b/qai_hub_models/models/deeplabv3_resnet50/model.py
@@ -22,7 +22,7 @@ class DeepLabV3_ResNet50(DeepLabV3Model):
 
     @classmethod
     def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> DeepLabV3_ResNet50:
-        model = tv_models.segmentation.deeplabv3_resnet50(weights=weights).eval()
+        model = tv_models.segmentation.deeplabv3_resnet50(weights=weights)
         return cls(model)
 
     def get_hub_compile_options(
diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
index a21c01a6..f21df1a7 100644
--- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
+++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: DeepLabV3-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 292980.0
-      throughput: 3.413202266366305
+      inference_time: 294244.0
+      throughput: 3.39853998722149
       estimated_peak_memory_range:
-        min: 2162688
-        max: 149701296
+        min: 12288
+        max: 149248280
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,7 +50,7 @@ models:
         layers_on_gpu: 95
         layers_on_cpu: 0
         total_layers: 95
-      job_id: jep23lkrg
+      job_id: jw56vknvp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +59,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:19:58Z'
+    timestamp: '2024-06-22T22:21:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 223885.0
-      throughput: 4.466578823949796
+      inference_time: 211646.0
+      throughput: 4.72487077478431
       estimated_peak_memory_range:
-        min: 65536
-        max: 32739680
+        min: 21852160
+        max: 55397216
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -71,7 +73,7 @@ models:
         layers_on_gpu: 95
         layers_on_cpu: 0
         total_layers: 95
-      job_id: jqpyv618p
+      job_id: j1p38yex5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:19:59Z'
+    timestamp: '2024-06-22T22:21:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 291243.0
-      throughput: 3.4335589181542563
+      inference_time: 290539.0
+      throughput: 3.4418787150778383
       estimated_peak_memory_range:
-        min: 5476352
-        max: 182706000
+        min: 2146304
+        max: 149777592
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -94,7 +96,7 @@ models:
         layers_on_gpu: 95
         layers_on_cpu: 0
         total_layers: 95
-      job_id: j2p0elz95
+      job_id: jwgomj345
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,4 +105,27 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:20:00Z'
+    timestamp: '2024-06-22T22:21:57Z'
+  - torchscript_onnx_tflite:
+      inference_time: 291805.0
+      throughput: 3.4269460770034783
+      estimated_peak_memory_range:
+        min: 2203648
+        max: 149323832
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 95
+        layers_on_cpu: 0
+        total_layers: 95
+      job_id: j1pv4jv7p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:21:58Z'
diff --git a/qai_hub_models/models/densenet121/export.py b/qai_hub_models/models/densenet121/export.py
index 9f0c3592..43f99d5b 100644
--- a/qai_hub_models/models/densenet121/export.py
+++ b/qai_hub_models/models/densenet121/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml
index 4cfd87c1..8544b0cd 100644
--- a/qai_hub_models/models/densenet121/perf.yaml
+++ b/qai_hub_models/models/densenet121/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: DenseNet-121
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1946.0
-      throughput: 513.874614594039
+      inference_time: 1943.0
+      throughput: 514.668039114771
       estimated_peak_memory_range:
-        min: 20480
-        max: 2555328
+        min: 45056
+        max: 2879152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 312
-      job_id: jygzvr2op
+      job_id: jqp48zvqg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1998.0
-      throughput: 500.5005005005005
+      inference_time: 1986.0
+      throughput: 503.5246727089627
       estimated_peak_memory_range:
-        min: 647168
-        max: 7884416
+        min: 643072
+        max: 41424160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jnp1qe28g
+      job_id: jopr9zyvp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1954.0
-      throughput: 511.77072671443193
+    torchscript_onnx:
+      inference_time: 1989.0
+      throughput: 502.76520864756156
       estimated_peak_memory_range:
-        min: 16384
-        max: 41751336
+        min: 12288
+        max: 56458408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 374
-      job_id: j0pxey935
+      job_id: jogkdjyyp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:20:34Z'
+    timestamp: '2024-06-22T22:22:38Z'
   - torchscript_onnx_tflite:
-      inference_time: 1318.0
-      throughput: 758.7253414264036
+      inference_time: 1314.0
+      throughput: 761.03500761035
       estimated_peak_memory_range:
         min: 12288
-        max: 96529440
+        max: 100921632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 312
-      job_id: jz5wmqw3g
+      job_id: j0pxmwyjg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1329.0
-      throughput: 752.4454477050414
+      inference_time: 1321.0
+      throughput: 757.002271006813
       estimated_peak_memory_range:
         min: 618496
-        max: 158201904
+        max: 147109104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jvgd7onrg
+      job_id: jep2j2mx5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1326.0
-      throughput: 754.1478129713424
+    torchscript_onnx:
+      inference_time: 1320.0
+      throughput: 757.5757575757576
       estimated_peak_memory_range:
-        min: 618496
-        max: 52734944
+        min: 0
+        max: 38797472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 374
-      job_id: jo5mv3ed5
+      job_id: jn5qwj275
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:20:35Z'
+    timestamp: '2024-06-22T22:22:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 1932.0
-      throughput: 517.5983436853002
+      inference_time: 1921.0
+      throughput: 520.5622071837585
       estimated_peak_memory_range:
-        min: 24576
-        max: 2118480
+        min: 12288
+        max: 2417360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 312
-      job_id: jmg99w0wg
+      job_id: jo5m4j3y5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1991.0
-      throughput: 502.26017076845807
+      inference_time: 1981.0
+      throughput: 504.79555779909134
       estimated_peak_memory_range:
-        min: 16384
-        max: 39662792
+        min: 24576
+        max: 40999864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jqp4jvn8p
+      job_id: j2p0knr25
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:20:33Z'
+    timestamp: '2024-06-22T22:22:36Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1936.0
+      throughput: 516.5289256198347
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1911888
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 312
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 312
+      job_id: jegnxjev5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1980.0
+      throughput: 505.050505050505
+      estimated_peak_memory_range:
+        min: 20480
+        max: 30511600
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 372
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 372
+      job_id: j1p88l7zp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:22:37Z'
   - torchscript_onnx_qnn:
-      inference_time: 2224.0
-      throughput: 449.64028776978415
+      inference_time: 1994.0
+      throughput: 501.5045135406219
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 372
-      job_id: jz57vx2v5
+      job_id: jqpyn9drg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2023.0
-      throughput: 494.3153732081068
+    torchscript_onnx:
+      inference_time: 2032.0
+      throughput: 492.12598425196853
       estimated_peak_memory_range:
-        min: 647168
-        max: 647168
+        min: 692224
+        max: 692224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 374
-      job_id: jegnr30k5
+      job_id: j1gl7jke5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:20:36Z'
+    timestamp: '2024-06-22T22:22:41Z'
diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py
index 8456a642..07919010 100644
--- a/qai_hub_models/models/detr_resnet101/export.py
+++ b/qai_hub_models/models/detr_resnet101/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +215,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml
index ab111618..405618e2 100644
--- a/qai_hub_models/models/detr_resnet101/perf.yaml
+++ b/qai_hub_models/models/detr_resnet101/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: DETR-ResNet101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 24522.0
-      throughput: 40.779708017290595
+      inference_time: 23559.0
+      throughput: 42.44662337111083
       estimated_peak_memory_range:
-        min: 405504
-        max: 3620824
+        min: 397312
+        max: 7286104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 839
+        layers_on_npu: 856
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 839
-      job_id: jep23lxrg
+        total_layers: 856
+      job_id: j1p38ymx5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 22510.0
-      throughput: 44.4247001332741
+    torchscript_onnx:
+      inference_time: 22403.0
+      throughput: 44.63687898942106
       estimated_peak_memory_range:
-        min: 53248
-        max: 301197496
+        min: 540672
+        max: 282693624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: jw56qn26g
+      job_id: jvgd0jvkp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +74,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:21:16Z'
+    timestamp: '2024-06-22T22:23:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 17593.0
-      throughput: 56.840788950150625
+      inference_time: 16655.0
+      throughput: 60.04202942059442
       estimated_peak_memory_range:
-        min: 385024
-        max: 284374432
+        min: 401408
+        max: 300844176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 839
+        layers_on_npu: 856
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 839
-      job_id: jqpyv6z8p
+        total_layers: 856
+      job_id: jwgomjv45
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 15841.0
-      throughput: 63.127327820213374
+    torchscript_onnx:
+      inference_time: 15968.0
+      throughput: 62.62525050100201
       estimated_peak_memory_range:
-        min: 38055936
-        max: 153822592
+        min: 4907008
+        max: 91770592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: j1p3qen35
+      job_id: jz5wxj9jp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +112,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:21:17Z'
+    timestamp: '2024-06-22T22:23:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 24627.0
-      throughput: 40.60583911966541
+      inference_time: 23444.0
+      throughput: 42.65483705852244
       estimated_peak_memory_range:
-        min: 413696
-        max: 3309184
+        min: 430080
+        max: 4002928
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 839
+        layers_on_npu: 856
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 839
-      job_id: j2p0el495
+        total_layers: 856
+      job_id: j1pv4jw7p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:21:11Z'
-  - torchscript_onnx_ort:
-      inference_time: 22958.0
-      throughput: 43.557801202195314
+    timestamp: '2024-06-22T22:23:15Z'
+  - torchscript_onnx_tflite:
+      inference_time: 23418.0
+      throughput: 42.70219489281749
+      estimated_peak_memory_range:
+        min: 401408
+        max: 3285856
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 856
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 856
+      job_id: j7gj1jl7g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:23:16Z'
+  - torchscript_onnx:
+      inference_time: 22866.0
+      throughput: 43.733053441791306
       estimated_peak_memory_range:
-        min: 100909056
-        max: 100909056
+        min: 106487808
+        max: 106487808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: jwgoe3zqp
+      job_id: jmg9864vp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:21:18Z'
+    timestamp: '2024-06-22T22:23:26Z'
diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py
index 260f917d..36780ea0 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/export.py
+++ b/qai_hub_models/models/detr_resnet101_dc5/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +215,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
index 6760c8c6..d9839baa 100644
--- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
+++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: DETR-ResNet101-DC5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 139662.0
-      throughput: 7.160143775687016
+      inference_time: 139854.0
+      throughput: 7.150313898780157
       estimated_peak_memory_range:
-        min: 1216512
-        max: 4184536
+        min: 135168
+        max: 3555384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 840
+        layers_on_npu: 857
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 840
-      job_id: j7gjkezv5
+        total_layers: 857
+      job_id: jvgd0jvlp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 125062.0
-      throughput: 7.996033967152292
+    torchscript_onnx:
+      inference_time: 134388.0
+      throughput: 7.4411405780278
       estimated_peak_memory_range:
-        min: 2994176
-        max: 315584184
+        min: 3080192
+        max: 305207456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: jz57vx0v5
+      job_id: j2p0kn1e5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +74,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:21:59Z'
+    timestamp: '2024-06-22T22:24:10Z'
   - torchscript_onnx_tflite:
-      inference_time: 106500.0
-      throughput: 9.389671361502348
+      inference_time: 107736.0
+      throughput: 9.281948466622113
       estimated_peak_memory_range:
-        min: 991232
-        max: 494886848
+        min: 339968
+        max: 509199680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 840
+        layers_on_npu: 857
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 840
-      job_id: jlpe4keo5
+        total_layers: 857
+      job_id: jz576qjrg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 96040.0
-      throughput: 10.412328196584756
+    torchscript_onnx:
+      inference_time: 95702.0
+      throughput: 10.44910242210194
       estimated_peak_memory_range:
-        min: 4145152
-        max: 167656240
+        min: 4952064
+        max: 150418448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: jqp4jvk8p
+      job_id: j1p88l38p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +112,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:22:00Z'
+    timestamp: '2024-06-22T22:24:11Z'
   - torchscript_onnx_tflite:
-      inference_time: 139388.0
-      throughput: 7.174218727580566
+      inference_time: 138735.0
+      throughput: 7.2079864489854755
       estimated_peak_memory_range:
-        min: 1548288
-        max: 4377008
+        min: 1204224
+        max: 3962280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 840
+        layers_on_npu: 857
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 840
-      job_id: jygzvroop
+        total_layers: 857
+      job_id: jqp48zxlg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:21:54Z'
-  - torchscript_onnx_ort:
-      inference_time: 124053.0
-      throughput: 8.061070671406576
+    timestamp: '2024-06-22T22:24:01Z'
+  - torchscript_onnx_tflite:
+      inference_time: 146672.0
+      throughput: 6.81793389331297
+      estimated_peak_memory_range:
+        min: 16384
+        max: 3270192
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 857
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 857
+      job_id: j0pxmw79g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:24:03Z'
+  - torchscript_onnx:
+      inference_time: 124132.0
+      throughput: 8.05594045048819
       estimated_peak_memory_range:
-        min: 73572352
-        max: 73572352
+        min: 77479936
+        max: 77479936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 856
-      job_id: j0pxeyn35
+      job_id: jogkdjlop
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:22:01Z'
+    timestamp: '2024-06-22T22:24:12Z'
diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py
index aae80346..78f4d4e0 100644
--- a/qai_hub_models/models/detr_resnet50/export.py
+++ b/qai_hub_models/models/detr_resnet50/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +215,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml
index fff85076..05978ec6 100644
--- a/qai_hub_models/models/detr_resnet50/perf.yaml
+++ b/qai_hub_models/models/detr_resnet50/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: DETR-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 21615.0
-      throughput: 46.26416840157298
+      inference_time: 19708.0
+      throughput: 50.74081591231987
       estimated_peak_memory_range:
-        min: 2134016
-        max: 5200288
+        min: 413696
+        max: 3185024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 771
+        layers_on_npu: 788
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 771
-      job_id: jegnr3lk5
+        total_layers: 788
+      job_id: j1gl7j0l5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 16643.0
-      throughput: 60.08532115604158
+    torchscript_onnx:
+      inference_time: 16775.0
+      throughput: 59.61251862891207
       estimated_peak_memory_range:
-        min: 1540096
-        max: 211446576
+        min: 1146880
+        max: 209226928
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: j1gle36jp
+      job_id: jmg9863vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +74,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:22:37Z'
+    timestamp: '2024-06-22T22:24:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 15132.0
-      throughput: 66.08511763150939
+      inference_time: 13806.0
+      throughput: 72.43227582210633
       estimated_peak_memory_range:
-        min: 36864
-        max: 231347824
+        min: 385024
+        max: 245987728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 771
+        layers_on_npu: 788
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 771
-      job_id: jep23l0rg
+        total_layers: 788
+      job_id: jw56vk37p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 11694.0
-      throughput: 85.51393877201984
+    torchscript_onnx:
+      inference_time: 11436.0
+      throughput: 87.44316194473592
       estimated_peak_memory_range:
-        min: 2809856
-        max: 97007056
+        min: 3637248
+        max: 83196560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jw56qne6g
+      job_id: jnp13rdl5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +112,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:22:38Z'
+    timestamp: '2024-06-22T22:24:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 21665.0
-      throughput: 46.157396722824835
+      inference_time: 19684.0
+      throughput: 50.80268238162975
       estimated_peak_memory_range:
-        min: 438272
-        max: 4576272
+        min: 385024
+        max: 3607136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 771
+        layers_on_npu: 788
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 771
-      job_id: jqpyv6r8p
+        total_layers: 788
+      job_id: j1p38y4z5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:22:32Z'
-  - torchscript_onnx_ort:
-      inference_time: 16944.0
-      throughput: 59.01794145420208
+    timestamp: '2024-06-22T22:24:42Z'
+  - torchscript_onnx_tflite:
+      inference_time: 19665.0
+      throughput: 50.851767098906684
+      estimated_peak_memory_range:
+        min: 16384
+        max: 2576216
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 788
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 788
+      job_id: jwgomj1d5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:24:44Z'
+  - torchscript_onnx:
+      inference_time: 17051.0
+      throughput: 58.64758665180928
       estimated_peak_memory_range:
-        min: 116158464
-        max: 116158464
+        min: 122880
+        max: 122880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: j1p3qev35
+      job_id: jvgd0jrlp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:22:39Z'
+    timestamp: '2024-06-22T22:24:53Z'
diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py
index af83c17f..292942fb 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/export.py
+++ b/qai_hub_models/models/detr_resnet50_dc5/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +215,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
index b9e42be2..6292defe 100644
--- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
+++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: DETR-ResNet50-DC5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 133335.0
-      throughput: 7.49990625117186
+      inference_time: 142484.0
+      throughput: 7.018331882878077
       estimated_peak_memory_range:
-        min: 135168
-        max: 3805824
+        min: 16384
+        max: 111187464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 772
+        layers_on_npu: 789
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 772
-      job_id: jz5wmq33g
+        total_layers: 789
+      job_id: jqp48zjlg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 117630.0
-      throughput: 8.501232678738416
+    torchscript_onnx:
+      inference_time: 126992.0
+      throughput: 7.874511780269623
       estimated_peak_memory_range:
-        min: 2134016
-        max: 232241232
+        min: 2805760
+        max: 227776696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jo5mv36d5
+      job_id: jogkdjrop
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +74,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:23:14Z'
+    timestamp: '2024-06-22T22:25:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 102075.0
-      throughput: 9.796718099436688
+      inference_time: 102232.0
+      throughput: 9.78167305735973
       estimated_peak_memory_range:
-        min: 163840
-        max: 444293712
+        min: 1269760
+        max: 456777392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 772
+        layers_on_npu: 789
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 772
-      job_id: jmg99wywg
+        total_layers: 789
+      job_id: j0pxmwe9g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 90172.0
-      throughput: 11.089917047420485
+    torchscript_onnx:
+      inference_time: 90670.0
+      throughput: 11.029006286533583
       estimated_peak_memory_range:
-        min: 6778880
-        max: 152435808
+        min: 6930432
+        max: 133973776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jegnr3mk5
+      job_id: jn5qwj9m5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +112,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:23:15Z'
+    timestamp: '2024-06-22T22:25:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 132335.0
-      throughput: 7.556579891940908
+      inference_time: 131821.0
+      throughput: 7.586044712147533
       estimated_peak_memory_range:
-        min: 1204224
-        max: 4586176
+        min: 1179648
+        max: 108134792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 772
+        layers_on_npu: 789
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 772
-      job_id: jnp1qew8g
+        total_layers: 789
+      job_id: jo5m4jvq5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:23:09Z'
-  - torchscript_onnx_ort:
-      inference_time: 116939.0
-      throughput: 8.551467004164564
+    timestamp: '2024-06-22T22:25:25Z'
+  - torchscript_onnx_tflite:
+      inference_time: 131445.0
+      throughput: 7.607744684088402
+      estimated_peak_memory_range:
+        min: 1212416
+        max: 4305656
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 789
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 789
+      job_id: jegnxjrm5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:25:26Z'
+  - torchscript_onnx:
+      inference_time: 116855.0
+      throughput: 8.557614137178554
       estimated_peak_memory_range:
-        min: 22482944
-        max: 22482944
+        min: 136323072
+        max: 136323072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 737
-      job_id: jopr1e20g
+      job_id: j1gl7jel5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:23:16Z'
+    timestamp: '2024-06-22T22:25:36Z'
diff --git a/qai_hub_models/models/efficientnet_b0/export.py b/qai_hub_models/models/efficientnet_b0/export.py
index d4d7827c..872b7a3c 100644
--- a/qai_hub_models/models/efficientnet_b0/export.py
+++ b/qai_hub_models/models/efficientnet_b0/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml
index 6383102d..6c34af8a 100644
--- a/qai_hub_models/models/efficientnet_b0/perf.yaml
+++ b/qai_hub_models/models/efficientnet_b0/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: EfficientNet-B0
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1626.0
-      throughput: 615.0061500615006
+      inference_time: 1607.0
+      throughput: 622.2775357809583
       estimated_peak_memory_range:
-        min: 16384
-        max: 1985056
+        min: 12288
+        max: 2260744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jqpyv6j8p
+      job_id: j1p38yqz5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1678.0
-      throughput: 595.9475566150179
+      inference_time: 1668.0
+      throughput: 599.5203836930456
       estimated_peak_memory_range:
-        min: 16384
-        max: 315561544
+        min: 622592
+        max: 78658688
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: jogkr3qw5
+      job_id: jlpe2j40p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1623.0
-      throughput: 616.1429451632779
+    torchscript_onnx:
+      inference_time: 1644.0
+      throughput: 608.272506082725
       estimated_peak_memory_range:
-        min: 16384
-        max: 80982248
+        min: 12288
+        max: 58718544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1p3qe135
+      job_id: jvgd0j7lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:23:45Z'
+    timestamp: '2024-06-22T22:26:06Z'
   - torchscript_onnx_tflite:
-      inference_time: 1142.0
-      throughput: 875.6567425569177
+      inference_time: 1143.0
+      throughput: 874.8906386701663
       estimated_peak_memory_range:
         min: 16384
-        max: 72610976
+        max: 76018192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j2p0el295
+      job_id: jwgomjed5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1186.0
-      throughput: 843.1703204047218
+      inference_time: 1179.0
+      throughput: 848.1764206955047
       estimated_peak_memory_range:
         min: 618496
-        max: 72353488
+        max: 60351408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: jn5q93rnp
+      job_id: jygzw1v6g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1173.0
-      throughput: 852.5149190110827
+    torchscript_onnx:
+      inference_time: 1177.0
+      throughput: 849.6176720475786
       estimated_peak_memory_range:
         min: 618496
-        max: 36882944
+        max: 28149584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jwgoe3nqp
+      job_id: jz576q6rg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:23:46Z'
+    timestamp: '2024-06-22T22:26:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 1631.0
-      throughput: 613.1207847946046
+      inference_time: 1618.0
+      throughput: 618.0469715698393
       estimated_peak_memory_range:
-        min: 16384
-        max: 2841808
+        min: 12288
+        max: 1906624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1p8wzmkp
+      job_id: j1pv4jzmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1683.0
-      throughput: 594.1770647653001
+      inference_time: 1666.0
+      throughput: 600.2400960384153
       estimated_peak_memory_range:
-        min: 622592
-        max: 88821056
+        min: 618496
+        max: 88931072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: jw56qnz6g
+      job_id: jmg9869vp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:23:44Z'
+    timestamp: '2024-06-22T22:26:04Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1624.0
+      throughput: 615.7635467980296
+      estimated_peak_memory_range:
+        min: 28672
+        max: 2761664
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 245
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 245
+      job_id: j7gj1jk8g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1685.0
+      throughput: 593.4718100890208
+      estimated_peak_memory_range:
+        min: 622592
+        max: 308210976
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 243
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 243
+      job_id: jnp13rql5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:26:05Z'
   - torchscript_onnx_qnn:
-      inference_time: 1838.0
-      throughput: 544.069640914037
+      inference_time: 1760.0
+      throughput: 568.1818181818181
       estimated_peak_memory_range:
-        min: 1310720
-        max: 1310720
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 243
-      job_id: j1gle32jp
+      job_id: jz5wxjmjp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1641.0
-      throughput: 609.3845216331505
+    torchscript_onnx:
+      inference_time: 1690.0
+      throughput: 591.7159763313609
       estimated_peak_memory_range:
-        min: 32149504
-        max: 32149504
+        min: 31383552
+        max: 31383552
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: j1pvzvvkg
+      job_id: jqp48z8lg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:23:47Z'
+    timestamp: '2024-06-22T22:26:08Z'
diff --git a/qai_hub_models/models/esrgan/export.py b/qai_hub_models/models/esrgan/export.py
index 47c6b95d..2043234d 100644
--- a/qai_hub_models/models/esrgan/export.py
+++ b/qai_hub_models/models/esrgan/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml
index e35a93e5..055e2306 100644
--- a/qai_hub_models/models/esrgan/perf.yaml
+++ b/qai_hub_models/models/esrgan/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ESRGAN
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 66520.0
-      throughput: 15.033072760072159
+      inference_time: 68462.0
+      throughput: 14.606643101282463
       estimated_peak_memory_range:
-        min: 4288512
-        max: 7346848
+        min: 4292608
+        max: 12423592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jlpe4kko5
+      job_id: jo5m4j4q5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 67593.0
-      throughput: 14.794431376030062
+      inference_time: 65477.0
+      throughput: 15.272538448615544
       estimated_peak_memory_range:
-        min: 73728
-        max: 104762776
+        min: 102400
+        max: 106544352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jmg99wwwg
+      job_id: jqpyn9n4g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 68322.0
-      throughput: 14.636573870788325
+    torchscript_onnx:
+      inference_time: 69101.0
+      throughput: 14.47157059955717
       estimated_peak_memory_range:
-        min: 6356992
-        max: 154422496
+        min: 6369280
+        max: 152418736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jmg99ww8g
+      job_id: j1gl7j7l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:24:28Z'
+    timestamp: '2024-06-22T22:26:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 56935.0
-      throughput: 17.56388864494599
+      inference_time: 52385.0
+      throughput: 19.08943399828195
       estimated_peak_memory_range:
-        min: 86016
-        max: 583340176
+        min: 3260416
+        max: 610205168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jygzvrrop
+      job_id: jegnxjxm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50707.0
-      throughput: 19.72114303745045
+      inference_time: 49752.0
+      throughput: 20.099694484643834
       estimated_peak_memory_range:
-        min: 73728
-        max: 260404000
+        min: 90112
+        max: 225116176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jnp1qee8g
+      job_id: j2p0knke5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 51557.0
-      throughput: 19.396008301491552
+    torchscript_onnx:
+      inference_time: 52240.0
+      throughput: 19.142419601837673
       estimated_peak_memory_range:
-        min: 5955584
-        max: 196150816
+        min: 6713344
+        max: 170523312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jnp1qee7g
+      job_id: jw56vkv7p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:24:29Z'
+    timestamp: '2024-06-22T22:26:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 65283.0
-      throughput: 15.31792350229003
+      inference_time: 68152.0
+      throughput: 14.673083695269398
       estimated_peak_memory_range:
-        min: 1536000
-        max: 4290816
+        min: 24576
+        max: 53347056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1024
-      job_id: jz5wmqq3g
+      job_id: jopr9z9ep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 65436.0
-      throughput: 15.282107708295127
+      inference_time: 68483.0
+      throughput: 14.602164040710834
       estimated_peak_memory_range:
-        min: 2744320
-        max: 60284768
+        min: 143360
+        max: 109872728
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jz5wmqqmg
+      job_id: jogkdjdop
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:24:27Z'
+    timestamp: '2024-06-22T22:26:50Z'
+  - torchscript_onnx_tflite:
+      inference_time: 75507.0
+      throughput: 13.243805210112969
+      estimated_peak_memory_range:
+        min: 3284992
+        max: 6621224
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1024
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1024
+      job_id: jep2j2jm5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 66483.0
+      throughput: 15.041439164899298
+      estimated_peak_memory_range:
+        min: 159744
+        max: 102750576
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1026
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1026
+      job_id: jn5qwjwm5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:26:51Z'
   - torchscript_onnx_qnn:
-      inference_time: 73135.0
-      throughput: 13.67334381623026
+      inference_time: 65392.0
+      throughput: 15.292390506483974
       estimated_peak_memory_range:
-        min: 221184
-        max: 221184
+        min: 204800
+        max: 204800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1026
-      job_id: jvgd7oorg
+      job_id: j1p88l88p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 65785.0
-      throughput: 15.20103367028958
+    torchscript_onnx:
+      inference_time: 65763.0
+      throughput: 15.206118942262366
       estimated_peak_memory_range:
-        min: 208896
-        max: 208896
+        min: 274432
+        max: 274432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jvgd7oozg
+      job_id: j1p38y8z5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:24:30Z'
+    timestamp: '2024-06-22T22:26:54Z'
diff --git a/qai_hub_models/models/facebook_denoiser/app.py b/qai_hub_models/models/facebook_denoiser/app.py
index 537fc024..26948493 100644
--- a/qai_hub_models/models/facebook_denoiser/app.py
+++ b/qai_hub_models/models/facebook_denoiser/app.py
@@ -60,27 +60,26 @@ def denoise_audio(
         Returns:
            Predicted audio. See `raw_output` parameter above for type of return value.
         """
-        with torch.no_grad():
-            all_inputs_are_paths = True
-
-            noisy_audios = []
-            for audio in input_audio:
-                if isinstance(audio, str) or isinstance(audio, Path):
-                    audio, sample_rate = torchaudio.load(audio)
-                    assert sample_rate == self.sample_rate
-                else:
-                    all_inputs_are_paths = False
-                    if isinstance(audio, np.ndarray):
-                        audio = torch.from_numpy(audio)
-                noisy_audios.append(audio)
-
-            estimates = []
-            for noisy in noisy_audios:
-                out = self.denoiser(noisy)
-                out = out / max(out.abs().max().item(), 1)  # Normalize
-                if all_inputs_are_paths:
-                    # We don't run files in batches, take the first batch output
-                    out = out[:, 0]
-                estimates.append(out)
-
-            return estimates
+        all_inputs_are_paths = True
+
+        noisy_audios = []
+        for audio in input_audio:
+            if isinstance(audio, str) or isinstance(audio, Path):
+                audio, sample_rate = torchaudio.load(audio)
+                assert sample_rate == self.sample_rate
+            else:
+                all_inputs_are_paths = False
+                if isinstance(audio, np.ndarray):
+                    audio = torch.from_numpy(audio)
+            noisy_audios.append(audio)
+
+        estimates = []
+        for noisy in noisy_audios:
+            out = self.denoiser(noisy)
+            out = out / max(out.abs().max().item(), 1)  # Normalize
+            if all_inputs_are_paths:
+                # We don't run files in batches, take the first batch output
+                out = out[:, 0]
+            estimates.append(out)
+
+        return estimates
diff --git a/qai_hub_models/models/facebook_denoiser/export.py b/qai_hub_models/models/facebook_denoiser/export.py
index 11ecd12d..18d0f13c 100644
--- a/qai_hub_models/models/facebook_denoiser/export.py
+++ b/qai_hub_models/models/facebook_denoiser/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -115,7 +115,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
@@ -172,7 +171,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -202,7 +201,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/facebook_denoiser/perf.yaml b/qai_hub_models/models/facebook_denoiser/perf.yaml
index 912fe7c1..ad4e6e61 100644
--- a/qai_hub_models/models/facebook_denoiser/perf.yaml
+++ b/qai_hub_models/models/facebook_denoiser/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Facebook-Denoiser
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 762754.0
-      throughput: 1.3110386835073955
+      inference_time: 764677.0
+      throughput: 1.307741700090365
       estimated_peak_memory_range:
-        min: 271872000
-        max: 745165216
+        min: 277712896
+        max: 349060880
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 205
         total_layers: 205
-      job_id: jqp4jvv1p
+      job_id: j1pv4j4mp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 14425872.0
-      throughput: 0.06931989969133236
+    torchscript_onnx:
+      inference_time: 14500590.0
+      throughput: 0.06896271117244195
       estimated_peak_memory_range:
-        min: 73728
-        max: 97772968
+        min: 720896
+        max: 75991544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 178
-      job_id: jegnr33q5
+      job_id: jz5wxjxjp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +74,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:24:56Z'
+    timestamp: '2024-06-22T22:27:22Z'
   - torchscript_onnx_tflite:
-      inference_time: 700116.0
-      throughput: 1.4283347331013718
+      inference_time: 778927.0
+      throughput: 1.2838173538726991
       estimated_peak_memory_range:
-        min: 418246656
-        max: 442262688
+        min: 484712448
+        max: 504708688
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -86,14 +88,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 205
         total_layers: 205
-      job_id: j0pxeyyl5
+      job_id: j7gj1j18g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 10632015.0
-      throughput: 0.09405554826625057
+    torchscript_onnx:
+      inference_time: 10659565.0
+      throughput: 0.09381245857593626
       estimated_peak_memory_range:
-        min: 16744448
-        max: 226752096
+        min: 19398656
+        max: 230929712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 178
-      job_id: jopr1ee7g
+      job_id: jmg9868vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +112,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:24:57Z'
+    timestamp: '2024-06-22T22:27:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 733772.0
-      throughput: 1.3628211488037156
+      inference_time: 699741.0
+      throughput: 1.4291001956438167
       estimated_peak_memory_range:
-        min: 89939968
-        max: 463947896
+        min: 365686784
+        max: 437035544
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -124,7 +126,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 205
         total_layers: 205
-      job_id: jo5mv3395
+      job_id: jlpe2j20p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:24:54Z'
-  - torchscript_onnx_ort:
-      inference_time: 15555145.0
-      throughput: 0.06428741101416927
+    timestamp: '2024-06-22T22:27:18Z'
+  - torchscript_onnx_tflite:
+      inference_time: 750714.0
+      throughput: 1.3320652072560255
+      estimated_peak_memory_range:
+        min: 461258752
+        max: 464483536
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 205
+        total_layers: 205
+      job_id: jygzw1w6g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:27:19Z'
+  - torchscript_onnx:
+      inference_time: 15624962.0
+      throughput: 0.06400015564837853
       estimated_peak_memory_range:
-        min: 450560
-        max: 450560
+        min: 446464
+        max: 446464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 178
-      job_id: jep23llqg
+      job_id: jnp13r3l5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:24:58Z'
+    timestamp: '2024-06-22T22:27:24Z'
diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py
index aad089b0..a27b1cb6 100644
--- a/qai_hub_models/models/fastsam_s/export.py
+++ b/qai_hub_models/models/fastsam_s/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,7 +117,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(
         model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
@@ -126,7 +125,7 @@ def export_model(
     channel_last_flags = (
         " --force_channel_last_input image"
         + " --force_channel_last_output output_1,output_2,output_3,output_5"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -172,7 +171,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -190,7 +189,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -212,7 +211,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_1,output_2,output_3,output_5", inference_result, target_runtime
             )
@@ -228,7 +227,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml
index 708e66d8..7b83f3f9 100644
--- a/qai_hub_models/models/fastsam_s/perf.yaml
+++ b/qai_hub_models/models/fastsam_s/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -35,27 +37,12 @@ aggregated:
 models:
 - name: FastSam-S
   performance_metrics:
-  - torchscript_onnx_tflite:
-      inference_time: 8700.0
-      throughput: 114.94252873563218
+  - torchscript_onnx:
+      inference_time: 10777.0
+      throughput: 92.79020135473694
       estimated_peak_memory_range:
-        min: 8429568
-        max: 39456112
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 288
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 288
-      job_id: j2p0elln5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 10893.0
-      throughput: 91.80207472688883
-      estimated_peak_memory_range:
-        min: 26902528
-        max: 83130600
+        min: 19591168
+        max: 74568352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +50,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jwgoe3vkp
+      job_id: j1p88l68p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +59,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:25:27Z'
-  - torchscript_onnx_tflite:
-      inference_time: 6426.0
-      throughput: 155.6178026766262
-      estimated_peak_memory_range:
-        min: 6594560
-        max: 79404896
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 288
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 288
-      job_id: j1p8wzzop
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7507.0
-      throughput: 133.20900492873318
+    timestamp: '2024-06-22T22:27:57Z'
+  - torchscript_onnx:
+      inference_time: 7538.0
+      throughput: 132.66118333775538
       estimated_peak_memory_range:
-        min: 27897856
-        max: 69661040
+        min: 22614016
+        max: 55712784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: j1pvzvwrg
+      job_id: jogkdjoop
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,36 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:25:28Z'
-  - torchscript_onnx_tflite:
-      inference_time: 8693.0
-      throughput: 115.03508570113885
-      estimated_peak_memory_range:
-        min: 3923968
-        max: 21721296
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 288
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 288
-      job_id: jogkr33n5
-      job_status: Passed
-    reference_device_info:
-      name: QCS8550 (Proxy)
-      os: '12'
-      form_factor: Iot
-      os_name: Android
-      manufacturer: Qualcomm
-      chipset: Qcs8550
-    timestamp: '2024-06-08T22:25:21Z'
-  - torchscript_onnx_ort:
-      inference_time: 10798.0
-      throughput: 92.60974254491572
+    timestamp: '2024-06-22T22:27:58Z'
+  - torchscript_onnx:
+      inference_time: 10922.0
+      throughput: 91.55832265152902
       estimated_peak_memory_range:
-        min: 72966144
-        max: 72966144
+        min: 68141056
+        max: 68141056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +96,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: j7gjkele5
+      job_id: jn5qwjzm5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +105,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:25:29Z'
+    timestamp: '2024-06-22T22:27:59Z'
diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py
index fb2a5872..80ca97d9 100644
--- a/qai_hub_models/models/fastsam_x/export.py
+++ b/qai_hub_models/models/fastsam_x/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,7 +117,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(
         model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
@@ -126,7 +125,7 @@ def export_model(
     channel_last_flags = (
         " --force_channel_last_input image"
         + " --force_channel_last_output output_1,output_2,output_3,output_5"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -172,7 +171,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -190,7 +189,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -212,7 +211,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_1,output_2,output_3,output_5", inference_result, target_runtime
             )
@@ -228,7 +227,10 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model,
+        supports_tflite=False,
+        supports_qnn=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml
index 51138c0a..4627470a 100644
--- a/qai_hub_models/models/fastsam_x/perf.yaml
+++ b/qai_hub_models/models/fastsam_x/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -35,27 +37,12 @@ aggregated:
 models:
 - name: FastSam-X
   performance_metrics:
-  - torchscript_onnx_tflite:
-      inference_time: 53656.0
-      throughput: 18.637244669748025
+  - torchscript_onnx:
+      inference_time: 54458.0
+      throughput: 18.362774982555365
       estimated_peak_memory_range:
-        min: 9220096
-        max: 14211840
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 420
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 420
-      job_id: jygzvr7xp
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 51625.0
-      throughput: 19.37046004842615
-      estimated_peak_memory_range:
-        min: 25325568
-        max: 343683192
+        min: 15622144
+        max: 336071040
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +50,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 421
-      job_id: j0pxey1l5
+      job_id: jnp13r7l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +59,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:26:04Z'
-  - torchscript_onnx_tflite:
-      inference_time: 36229.0
-      throughput: 27.602197134891938
-      estimated_peak_memory_range:
-        min: 8450048
-        max: 144127216
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 420
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 420
-      job_id: jz5wmq9mg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 37119.0
-      throughput: 26.94038093698645
+    timestamp: '2024-06-22T22:28:37Z'
+  - torchscript_onnx:
+      inference_time: 37262.0
+      throughput: 26.83699210992432
       estimated_peak_memory_range:
-        min: 29941760
-        max: 95002704
+        min: 30011392
+        max: 85984848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 421
-      job_id: jo5mv3z95
+      job_id: jvgd0j8lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,36 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:26:05Z'
-  - torchscript_onnx_tflite:
-      inference_time: 49800.0
-      throughput: 20.080321285140563
-      estimated_peak_memory_range:
-        min: 9379840
-        max: 47006488
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 420
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 420
-      job_id: jmg99w48g
-      job_status: Passed
-    reference_device_info:
-      name: QCS8550 (Proxy)
-      os: '12'
-      form_factor: Iot
-      os_name: Android
-      manufacturer: Qualcomm
-      chipset: Qcs8550
-    timestamp: '2024-06-08T22:25:58Z'
-  - torchscript_onnx_ort:
-      inference_time: 49559.0
-      throughput: 20.177969692689523
+    timestamp: '2024-06-22T22:28:38Z'
+  - torchscript_onnx:
+      inference_time: 49517.0
+      throughput: 20.1950845164287
       estimated_peak_memory_range:
-        min: 30785536
-        max: 30785536
+        min: 36007936
+        max: 36007936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +96,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 421
-      job_id: jegnr3eq5
+      job_id: jz5wxj16p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +105,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:26:06Z'
+    timestamp: '2024-06-22T22:28:39Z'
diff --git a/qai_hub_models/models/fcn_resnet50/app.py b/qai_hub_models/models/fcn_resnet50/app.py
index 08d0329a..2f8fae77 100644
--- a/qai_hub_models/models/fcn_resnet50/app.py
+++ b/qai_hub_models/models/fcn_resnet50/app.py
@@ -65,9 +65,8 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray:
         """
 
         input_tensor = preprocess_image(image)
-        with torch.no_grad():
-            output = self.model(input_tensor)
-            output = output[0]
+        output = self.model(input_tensor)
+        output = output[0]
         predictions = output.argmax(0).byte().cpu().numpy()
 
         if raw_output:
diff --git a/qai_hub_models/models/fcn_resnet50/export.py b/qai_hub_models/models/fcn_resnet50/export.py
index cc121898..c5b241c3 100644
--- a/qai_hub_models/models/fcn_resnet50/export.py
+++ b/qai_hub_models/models/fcn_resnet50/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/fcn_resnet50/model.py b/qai_hub_models/models/fcn_resnet50/model.py
index e336cecf..bf95d3a4 100644
--- a/qai_hub_models/models/fcn_resnet50/model.py
+++ b/qai_hub_models/models/fcn_resnet50/model.py
@@ -31,7 +31,7 @@ def __init__(
 
     @classmethod
     def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> FCN_ResNet50:
-        model = tv_models.segmentation.fcn_resnet50(weights=weights).eval()
+        model = tv_models.segmentation.fcn_resnet50(weights=weights)
         model.aux_classifier = None
         return cls(model)
 
diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml
index 8d3c9539..cf9278eb 100644
--- a/qai_hub_models/models/fcn_resnet50/perf.yaml
+++ b/qai_hub_models/models/fcn_resnet50/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: FCN-ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 41432.0
-      throughput: 24.135933577910794
+      inference_time: 42095.0
+      throughput: 23.75579047392802
       estimated_peak_memory_range:
-        min: 22097920
-        max: 25129176
+        min: 22130688
+        max: 24721192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jep23lmqg
+      job_id: jnp13rv25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 42249.0
-      throughput: 23.669199270988663
+      inference_time: 42393.0
+      throughput: 23.58880003774208
       estimated_peak_memory_range:
-        min: 3497984
-        max: 21232048
+        min: 3244032
+        max: 20959008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: j1p8wz7op
+      job_id: j0pxmwd1g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 43347.0
-      throughput: 23.069647265093316
+    torchscript_onnx:
+      inference_time: 43645.0
+      throughput: 22.912131973880168
       estimated_peak_memory_range:
-        min: 44056576
-        max: 204120472
+        min: 44417024
+        max: 203720776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jw56qn3yg
+      job_id: jqpyn977g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:26:38Z'
+    timestamp: '2024-06-22T22:29:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 31357.0
-      throughput: 31.890805880664605
+      inference_time: 30857.0
+      throughput: 32.407557442395564
       estimated_peak_memory_range:
-        min: 49152
-        max: 137281408
+        min: 21643264
+        max: 161762880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jqpyv6dlp
+      job_id: jvgd0jzep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 31599.0
-      throughput: 31.64657109402196
+      inference_time: 31702.0
+      throughput: 31.54375118289067
       estimated_peak_memory_range:
-        min: 3162112
-        max: 80794592
+        min: 2564096
+        max: 75592160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: jogkr3yn5
+      job_id: jo5m4jdw5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 32324.0
-      throughput: 30.936765251825268
+    torchscript_onnx:
+      inference_time: 31938.0
+      throughput: 31.31066441229883
       estimated_peak_memory_range:
-        min: 43311104
-        max: 107423312
+        min: 41369600
+        max: 109154816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: j1p3qe4n5
+      job_id: j2p0knv65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:26:39Z'
+    timestamp: '2024-06-22T22:29:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 41734.0
-      throughput: 23.9612785738247
+      inference_time: 42583.0
+      throughput: 23.483549773383746
       estimated_peak_memory_range:
         min: 22106112
-        max: 24857096
+        max: 24637056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: j2p0elrn5
+      job_id: jz576q7lg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 42169.0
-      throughput: 23.714102776921436
+      inference_time: 42400.0
+      throughput: 23.58490566037736
       estimated_peak_memory_range:
         min: 3166208
-        max: 19865232
+        max: 20583048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: j1gle30mp
+      job_id: jopr9zn9p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:26:37Z'
+    timestamp: '2024-06-22T22:29:14Z'
+  - torchscript_onnx_tflite:
+      inference_time: 42218.0
+      throughput: 23.686579184234212
+      estimated_peak_memory_range:
+        min: 22097920
+        max: 24890008
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 86
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 86
+      job_id: jqp48z9vg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 42279.0
+      throughput: 23.65240426689373
+      estimated_peak_memory_range:
+        min: 3227648
+        max: 20518616
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 127
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 127
+      job_id: jep2j2v45
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:29:15Z'
   - torchscript_onnx_qnn:
-      inference_time: 70340.0
-      throughput: 14.216661927779358
+      inference_time: 39514.0
+      throughput: 25.307485954345296
       estimated_peak_memory_range:
         min: 3153920
         max: 3153920
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 127
-      job_id: jn5q932op
+      job_id: jegnxj7r5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 42281.0
-      throughput: 23.651285447364064
+    torchscript_onnx:
+      inference_time: 42191.0
+      throughput: 23.701737337346827
       estimated_peak_memory_range:
-        min: 9379840
-        max: 9379840
+        min: 28254208
+        max: 28254208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jwgoe31kp
+      job_id: j1p88l4xp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:26:40Z'
+    timestamp: '2024-06-22T22:29:18Z'
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/export.py b/qai_hub_models/models/fcn_resnet50_quantized/export.py
index 646bdaab..e85ca498 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/export.py
+++ b/qai_hub_models/models/fcn_resnet50_quantized/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -127,7 +127,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -177,7 +177,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -217,7 +217,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/model.py b/qai_hub_models/models/fcn_resnet50_quantized/model.py
index affc65ef..faaf2c1b 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/model.py
+++ b/qai_hub_models/models/fcn_resnet50_quantized/model.py
@@ -74,7 +74,6 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         final_model = cls(sim)
         return final_model
 
diff --git a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml
index 83867788..8727d190 100644
--- a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml
+++ b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: FCN-ResNet50-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 14137.0
-      throughput: 70.73636556553724
+      inference_time: 14077.0
+      throughput: 71.03786318107551
       estimated_peak_memory_range:
-        min: 7475200
-        max: 59586696
+        min: 3821568
+        max: 55833368
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: j7gjke0e5
+      job_id: jn5qwjm45
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 15266.0
-      throughput: 65.5050438883794
+      inference_time: 15172.0
+      throughput: 65.9108884787767
       estimated_peak_memory_range:
-        min: 839680
-        max: 9922576
+        min: 16384
+        max: 23692160
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jnp1qed7g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 12789.0
-      throughput: 78.19219641879741
-      estimated_peak_memory_range:
-        min: 9297920
-        max: 58295544
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 80
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 80
-      job_id: j0pxey7l5
+      job_id: j7gj1jwxg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:28:08Z'
+    timestamp: '2024-06-22T22:30:41Z'
   - torchscript_onnx_tflite:
-      inference_time: 10012.0
-      throughput: 99.88014382740711
+      inference_time: 10017.0
+      throughput: 99.83028850953379
       estimated_peak_memory_range:
-        min: 73728
-        max: 83075216
+        min: 40960
+        max: 86289520
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: jlpe4krv5
+      job_id: j1gl7j185
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11234.0
-      throughput: 89.01548869503294
+      inference_time: 11196.0
+      throughput: 89.31761343336906
       estimated_peak_memory_range:
-        min: 802816
-        max: 55488784
+        min: 46923776
+        max: 100046512
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jvgd7orzg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9614.0
-      throughput: 104.01497815685458
-      estimated_peak_memory_range:
-        min: 11309056
-        max: 56165696
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 80
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 80
-      job_id: jegnr39q5
+      job_id: jlpe2jl1p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:28:09Z'
+    timestamp: '2024-06-22T22:30:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 14165.0
-      throughput: 70.5965407695023
+      inference_time: 13974.0
+      throughput: 71.56147130385001
       estimated_peak_memory_range:
-        min: 5574656
-        max: 14323152
+        min: 5554176
+        max: 7391176
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: jygzvrxxp
+      job_id: jw56vkd0p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 15225.0
-      throughput: 65.68144499178982
+      inference_time: 15246.0
+      throughput: 65.59097468188378
       estimated_peak_memory_range:
-        min: 811008
-        max: 30220216
+        min: 24576
+        max: 14758840
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jqp4jvx1p
+      job_id: jz5wxj46p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:28:07Z'
+    timestamp: '2024-06-22T22:30:45Z'
   - torchscript_onnx_tflite:
-      inference_time: 89203.0
-      throughput: 11.210385300942793
+      inference_time: 14037.0
+      throughput: 71.24029351000927
       estimated_peak_memory_range:
-        min: 6000640
-        max: 92646944
+        min: 5521408
+        max: 300306456
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 87
-      job_id: jz5wmqdmg
+      job_id: j1p38ywl5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 15206.0
+      throughput: 65.76351440220965
+      estimated_peak_memory_range:
+        min: 16384
+        max: 227015544
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 79
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 79
+      job_id: jmg986dlp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:30:47Z'
+  - torchscript_onnx_tflite:
+      inference_time: 88951.0
+      throughput: 11.242144551494643
+      estimated_peak_memory_range:
+        min: 5931008
+        max: 95025424
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 87
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 87
+      job_id: jwgomj4x5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T22:28:02Z'
+    timestamp: '2024-06-22T22:30:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 728106.0
-      throughput: 1.373426396705974
+      inference_time: 733152.0
+      throughput: 1.3639736371175417
       estimated_peak_memory_range:
-        min: 33034240
-        max: 70768096
+        min: 22896640
+        max: 174088744
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 9
         layers_on_cpu: 12
         total_layers: 87
-      job_id: jmg99w38g
+      job_id: j1pv4j9jp
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,10 +242,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T22:28:03Z'
+    timestamp: '2024-06-22T22:30:40Z'
   - torchscript_onnx_qnn:
-      inference_time: 16789.0
-      throughput: 59.562808982071594
+      inference_time: 12928.0
+      throughput: 77.35148514851485
       estimated_peak_memory_range:
         min: 794624
         max: 794624
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jz57vxj95
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 12535.0
-      throughput: 79.77662544874352
-      estimated_peak_memory_range:
-        min: 835584
-        max: 835584
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 80
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 80
-      job_id: jopr1e47g
+      job_id: jygzw14kg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:28:10Z'
+    timestamp: '2024-06-22T22:30:44Z'
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/export.py b/qai_hub_models/models/ffnet_122ns_lowres/export.py
index 98404700..0aa30f17 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/export.py
+++ b/qai_hub_models/models/ffnet_122ns_lowres/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
index 996c082b..d4318a47 100644
--- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
+++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: FFNet-122NS-LowRes
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 9538.0
-      throughput: 104.84378276368211
+      inference_time: 6446.0
+      throughput: 155.13496742165685
       estimated_peak_memory_range:
-        min: 0
-        max: 1882960
+        min: 675840
+        max: 3400696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: j1p8wz3op
+      job_id: jegnxjkr5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 10684.0
-      throughput: 93.59790340696368
+      inference_time: 6982.0
+      throughput: 143.22543683758235
       estimated_peak_memory_range:
-        min: 7036928
-        max: 23266984
+        min: 6311936
+        max: 32757704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: j1gle3emp
+      job_id: j2p0kn665
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7933.0
-      throughput: 126.05571662674902
+    torchscript_onnx:
+      inference_time: 8035.0
+      throughput: 124.45550715619166
       estimated_peak_memory_range:
-        min: 1155072
-        max: 141586240
+        min: 2633728
+        max: 141075736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 350
-      job_id: j1pvzvzrg
+      job_id: jw56vkw0p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:28:42Z'
+    timestamp: '2024-06-22T22:31:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 6833.0
-      throughput: 146.34860237084735
+      inference_time: 4513.0
+      throughput: 221.58209616662973
       estimated_peak_memory_range:
-        min: 659456
-        max: 61929920
+        min: 663552
+        max: 68570384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: jogkr3ln5
+      job_id: jopr9zw9p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7606.0
-      throughput: 131.47515119642387
+      inference_time: 4927.0
+      throughput: 202.96326364927947
       estimated_peak_memory_range:
         min: 6307840
-        max: 93102864
+        max: 73938192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: jw56qnqyg
+      job_id: j1p88l1xp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5594.0
-      throughput: 178.7629603146228
+    torchscript_onnx:
+      inference_time: 5615.0
+      throughput: 178.09439002671417
       estimated_peak_memory_range:
-        min: 6307840
-        max: 59711872
+        min: 9445376
+        max: 50671440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 350
-      job_id: j7gjkeke5
+      job_id: j1p38y6l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:28:43Z'
+    timestamp: '2024-06-22T22:31:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 9545.0
-      throughput: 104.76689366160294
+      inference_time: 6458.0
+      throughput: 154.8467017652524
       estimated_peak_memory_range:
-        min: 0
-        max: 2096664
+        min: 561152
+        max: 3140840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 216
-      job_id: jn5q937op
+      job_id: jep2j2e45
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 10716.0
-      throughput: 93.3184023889511
+      inference_time: 6974.0
+      throughput: 143.38973329509608
       estimated_peak_memory_range:
         min: 6311936
-        max: 40648480
+        max: 30973152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: jwgoe3ekp
+      job_id: jn5qwjv45
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:28:41Z'
+    timestamp: '2024-06-22T22:31:25Z'
+  - torchscript_onnx_tflite:
+      inference_time: 6444.0
+      throughput: 155.18311607697083
+      estimated_peak_memory_range:
+        min: 57344
+        max: 2216792
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 216
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 216
+      job_id: jqpyn9m7g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 7120.0
+      throughput: 140.4494382022472
+      estimated_peak_memory_range:
+        min: 6307840
+        max: 30929088
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 348
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 348
+      job_id: j1gl7jl85
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:31:26Z'
   - torchscript_onnx_qnn:
-      inference_time: 17375.0
-      throughput: 57.55395683453237
+      inference_time: 6306.0
+      throughput: 158.5791309863622
       estimated_peak_memory_range:
         min: 6303744
         max: 6303744
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 348
-      job_id: j1p3qeqn5
+      job_id: jogkdj82p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7523.0
-      throughput: 132.92569453675395
+    torchscript_onnx:
+      inference_time: 7546.0
+      throughput: 132.520540683806
       estimated_peak_memory_range:
-        min: 6332416
-        max: 6332416
+        min: 6307840
+        max: 6307840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 350
-      job_id: jlpe4k4v5
+      job_id: jwgomj8x5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:28:44Z'
+    timestamp: '2024-06-22T22:31:30Z'
diff --git a/qai_hub_models/models/ffnet_40s/export.py b/qai_hub_models/models/ffnet_40s/export.py
index 879b2dbd..0d6746bd 100644
--- a/qai_hub_models/models/ffnet_40s/export.py
+++ b/qai_hub_models/models/ffnet_40s/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml
index 2da118a9..b6184d8d 100644
--- a/qai_hub_models/models/ffnet_40s/perf.yaml
+++ b/qai_hub_models/models/ffnet_40s/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: FFNet-40S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 23193.0
-      throughput: 43.11645755184754
+      inference_time: 16960.0
+      throughput: 58.9622641509434
       estimated_peak_memory_range:
-        min: 2531328
-        max: 4441664
+        min: 2121728
+        max: 4539152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jz5wmqmmg
+      job_id: j7gj1jqxg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 17411.0
-      throughput: 57.43495491356039
+      inference_time: 17413.0
+      throughput: 57.42835812324125
       estimated_peak_memory_range:
-        min: 25214976
-        max: 45407080
+        min: 25198592
+        max: 45873296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jvgd7o7zg
+      job_id: jmg986mlp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 27393.0
-      throughput: 36.50567663271639
+    torchscript_onnx:
+      inference_time: 27211.0
+      throughput: 36.74984381316379
       estimated_peak_memory_range:
-        min: 34656256
-        max: 113886552
+        min: 30203904
+        max: 110856920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 142
-      job_id: jo5mv3v95
+      job_id: j0pxmw41g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:29:14Z'
+    timestamp: '2024-06-22T22:32:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 16820.0
-      throughput: 59.45303210463734
+      inference_time: 12646.0
+      throughput: 79.07638779060572
       estimated_peak_memory_range:
-        min: 757760
-        max: 102036720
+        min: 1835008
+        max: 94309104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jmg99w98g
+      job_id: jlpe2jy1p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 12560.0
-      throughput: 79.61783439490446
+      inference_time: 12519.0
+      throughput: 79.87858455148175
       estimated_peak_memory_range:
-        min: 132333568
-        max: 190814608
+        min: 25178112
+        max: 79443216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jz57vxv95
+      job_id: jnp13rj25
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 19832.0
-      throughput: 50.42355788624445
+    torchscript_onnx:
+      inference_time: 19821.0
+      throughput: 50.45154129458655
       estimated_peak_memory_range:
-        min: 29405184
-        max: 74127520
+        min: 29417472
+        max: 71721472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 142
-      job_id: jegnr3rq5
+      job_id: jo5m4jmw5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:29:15Z'
+    timestamp: '2024-06-22T22:32:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 23566.0
-      throughput: 42.43401510650938
+      inference_time: 17132.0
+      throughput: 58.37030119075414
       estimated_peak_memory_range:
-        min: 2564096
-        max: 4836528
+        min: 2531328
+        max: 4933896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 92
-      job_id: jnp1qeq7g
+      job_id: jygzw1nkg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 17310.0
-      throughput: 57.77007510109763
+      inference_time: 17490.0
+      throughput: 57.17552887364208
       estimated_peak_memory_range:
-        min: 25202688
-        max: 45281048
+        min: 25210880
+        max: 45868592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: j0pxeyel5
+      job_id: jz576q4lg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:29:13Z'
+    timestamp: '2024-06-22T22:32:01Z'
+  - torchscript_onnx_tflite:
+      inference_time: 17571.0
+      throughput: 56.911957202208185
+      estimated_peak_memory_range:
+        min: 2555904
+        max: 5263536
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 92
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 92
+      job_id: jz5wxj76p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 17330.0
+      throughput: 57.70340450086555
+      estimated_peak_memory_range:
+        min: 24944640
+        max: 45612104
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 140
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 140
+      job_id: jqp48z1vg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:32:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 23356.0
-      throughput: 42.81555060798082
+      inference_time: 17741.0
+      throughput: 56.3666084211713
       estimated_peak_memory_range:
-        min: 25219072
-        max: 25219072
+        min: 25223168
+        max: 25223168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,11 +232,11 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jqp4jvj1p
+      job_id: jvgd0j3ep
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 26356.0
-      throughput: 37.942024586431934
+    torchscript_onnx:
+      inference_time: 26353.0
+      throughput: 37.94634386976815
       estimated_peak_memory_range:
         min: 25219072
         max: 25219072
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 142
-      job_id: jopr1e17g
+      job_id: jegnxjnr5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:29:16Z'
+    timestamp: '2024-06-22T22:32:05Z'
diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py
index ad846a21..4cd3dfb5 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_40s_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +216,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -229,7 +229,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
index c8a1dcdd..80fc0489 100644
--- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: FFNet-40S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6442.0
-      throughput: 155.2312946289972
+      inference_time: 6472.0
+      throughput: 154.51174289245984
       estimated_peak_memory_range:
-        min: 36864
-        max: 1593576
+        min: 667648
+        max: 2150792
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,22 +58,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: jqpyv6vlp
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9268.0
-      throughput: 107.89814415192059
-      estimated_peak_memory_range:
-        min: 7577600
-        max: 25025832
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 92
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 92
-      job_id: j1pvzv4rg
+      job_id: jep2j2w45
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +67,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:30:02Z'
+    timestamp: '2024-06-22T22:32:42Z'
   - torchscript_onnx_tflite:
-      inference_time: 4682.0
-      throughput: 213.58393848782572
+      inference_time: 4697.0
+      throughput: 212.90185224611454
       estimated_peak_memory_range:
-        min: 12288
-        max: 67067712
+        min: 40960
+        max: 70704480
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,22 +81,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: j2p0elen5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7185.0
-      throughput: 139.17884481558804
-      estimated_peak_memory_range:
-        min: 6955008
-        max: 47776688
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 92
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 92
-      job_id: j7gjke1e5
+      job_id: jqpyn9x7g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +90,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:30:03Z'
+    timestamp: '2024-06-22T22:32:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 6401.0
-      throughput: 156.22558975160132
+      inference_time: 6430.0
+      throughput: 155.52099533437013
       estimated_peak_memory_range:
-        min: 651264
-        max: 2179136
+        min: 638976
+        max: 8974520
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,7 +104,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: j1p8wzwop
+      job_id: j2p0knj65
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -139,13 +113,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:29:54Z'
+    timestamp: '2024-06-22T22:32:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 35462.0
-      throughput: 28.199199142744344
+      inference_time: 6440.0
+      throughput: 155.27950310559007
       estimated_peak_memory_range:
-        min: 163840
-        max: 38805968
+        min: 647168
+        max: 9801960
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -153,22 +127,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: jogkr3rn5
+      job_id: j1p88lxxp
       job_status: Passed
     reference_device_info:
-      name: RB3 Gen 2 (Proxy)
-      os: '12'
-      form_factor: Iot
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs6490
-    timestamp: '2024-06-08T22:29:55Z'
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:32:45Z'
   - torchscript_onnx_tflite:
-      inference_time: 189203.0
-      throughput: 5.285328456736944
+      inference_time: 35271.0
+      throughput: 28.351903830342206
       estimated_peak_memory_range:
-        min: 835584
-        max: 9440536
+        min: 176128
+        max: 42089840
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -176,36 +150,36 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 97
-      job_id: jn5q939op
+      job_id: jogkdj42p
       job_status: Passed
     reference_device_info:
-      name: RB5 (Proxy)
+      name: RB3 Gen 2 (Proxy)
       os: '12'
       form_factor: Iot
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs8250
-    timestamp: '2024-06-08T22:29:56Z'
-  - torchscript_onnx_ort:
-      inference_time: 8436.0
-      throughput: 118.53959222380276
+      chipset: Qcs6490
+    timestamp: '2024-06-22T22:32:46Z'
+  - torchscript_onnx_tflite:
+      inference_time: 189478.0
+      throughput: 5.277657564466587
       estimated_peak_memory_range:
-        min: 23719936
-        max: 23719936
+        min: 806912
+        max: 2886248
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 92
+        layers_on_npu: 97
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 92
-      job_id: jlpe4k2v5
+        total_layers: 97
+      job_id: jn5qwjy45
       job_status: Passed
     reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
       manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:30:04Z'
+      chipset: Qcs8250
+    timestamp: '2024-06-22T22:32:48Z'
diff --git a/qai_hub_models/models/ffnet_54s/export.py b/qai_hub_models/models/ffnet_54s/export.py
index a573a9f5..e27bc0d5 100644
--- a/qai_hub_models/models/ffnet_54s/export.py
+++ b/qai_hub_models/models/ffnet_54s/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml
index e912f5b5..7489b233 100644
--- a/qai_hub_models/models/ffnet_54s/perf.yaml
+++ b/qai_hub_models/models/ffnet_54s/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: FFNet-54S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 25403.0
-      throughput: 39.365429280006296
+      inference_time: 20186.0
+      throughput: 49.53928465272961
       estimated_peak_memory_range:
-        min: 4255744
-        max: 6909008
+        min: 2146304
+        max: 4656256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: jz5wmqxmg
+      job_id: jvgd0jdep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 20253.0
-      throughput: 49.37540117513455
+      inference_time: 20279.0
+      throughput: 49.31209625721189
       estimated_peak_memory_range:
-        min: 25219072
-        max: 49749016
+        min: 24895488
+        max: 44659984
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jvgd7o0zg
+      job_id: jo5m4j0w5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 30396.0
-      throughput: 32.89906566653507
+    torchscript_onnx:
+      inference_time: 31290.0
+      throughput: 31.959092361776925
       estimated_peak_memory_range:
-        min: 25182208
-        max: 90860800
+        min: 30216192
+        max: 97049136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 177
-      job_id: jvgd7o06g
+      job_id: j2p0knm65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:30:34Z'
+    timestamp: '2024-06-22T22:33:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 18529.0
-      throughput: 53.96945328943818
+      inference_time: 14966.0
+      throughput: 66.81812107443538
       estimated_peak_memory_range:
-        min: 2461696
-        max: 110619440
+        min: 438272
+        max: 104545232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: jmg99w88g
+      job_id: jz576qelg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14443.0
-      throughput: 69.23769300006924
+      inference_time: 14534.0
+      throughput: 68.8041832943443
       estimated_peak_memory_range:
-        min: 20983808
-        max: 91014848
+        min: 21000192
+        max: 80471360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jz5wmqx4g
+      job_id: jegnxjzr5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 23366.0
-      throughput: 42.79722673970727
+    torchscript_onnx:
+      inference_time: 22636.0
+      throughput: 44.1774165046828
       estimated_peak_memory_range:
-        min: 29618176
-        max: 74645360
+        min: 29200384
+        max: 73676080
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 177
-      job_id: jz57vx6n5
+      job_id: j1p88lexp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:30:35Z'
+    timestamp: '2024-06-22T22:33:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 25775.0
-      throughput: 38.797284190106694
+      inference_time: 20656.0
+      throughput: 48.412083656080554
       estimated_peak_memory_range:
-        min: 2547712
-        max: 5263000
+        min: 229376
+        max: 2364176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 113
-      job_id: jnp1qe37g
+      job_id: jqp48zyvg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 20126.0
-      throughput: 49.686972075921695
+      inference_time: 20059.0
+      throughput: 49.85293384515679
       estimated_peak_memory_range:
-        min: 25214976
-        max: 40883168
+        min: 25210880
+        max: 40220656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jnp1qe3ng
+      job_id: jep2j2r45
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:30:33Z'
+    timestamp: '2024-06-22T22:33:31Z'
+  - torchscript_onnx_tflite:
+      inference_time: 20271.0
+      throughput: 49.331557397267034
+      estimated_peak_memory_range:
+        min: 2560000
+        max: 4512832
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: j0pxmwl1g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 19981.0
+      throughput: 50.04754516790951
+      estimated_peak_memory_range:
+        min: 25206784
+        max: 44193592
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 175
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 175
+      job_id: jqpyn9o7g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:33:33Z'
   - torchscript_onnx_qnn:
-      inference_time: 25735.0
-      throughput: 38.857586943850784
+      inference_time: 20202.0
+      throughput: 49.5000495000495
       estimated_peak_memory_range:
         min: 25219072
         max: 25219072
@@ -192,11 +232,11 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 175
-      job_id: jmg99w8mg
+      job_id: jopr9zl9p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 29431.0
-      throughput: 33.97777853283952
+    torchscript_onnx:
+      inference_time: 29334.0
+      throughput: 34.0901343151292
       estimated_peak_memory_range:
         min: 25223168
         max: 25223168
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 177
-      job_id: jqp4jv82p
+      job_id: jogkdj22p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:30:36Z'
+    timestamp: '2024-06-22T22:33:36Z'
diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py
index f16f2f1a..9dfa7408 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_54s_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +216,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -229,7 +229,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
index 31883584..218f33b4 100644
--- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: FFNet-54S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7119.0
-      throughput: 140.4691670178396
+      inference_time: 7131.0
+      throughput: 140.2327864254663
       estimated_peak_memory_range:
-        min: 688128
-        max: 2335176
+        min: 163840
+        max: 1788488
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,22 +58,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jo5mv3475
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9678.0
-      throughput: 103.32713370531101
-      estimated_peak_memory_range:
-        min: 7581696
-        max: 40900680
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 113
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 113
-      job_id: j1gle3o2p
+      job_id: j1gl7jy85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +67,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:31:28Z'
+    timestamp: '2024-06-22T22:34:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 5120.0
-      throughput: 195.3125
+      inference_time: 5246.0
+      throughput: 190.62142584826535
       estimated_peak_memory_range:
-        min: 45056
-        max: 74881936
+        min: 696320
+        max: 83030896
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,22 +81,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jegnr3xj5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7395.0
-      throughput: 135.2265043948614
-      estimated_peak_memory_range:
-        min: 5738496
-        max: 42316048
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 113
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 113
-      job_id: jw56qnrng
+      job_id: jw56vk80p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +90,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:31:29Z'
+    timestamp: '2024-06-22T22:34:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 7096.0
-      throughput: 140.92446448703495
+      inference_time: 7205.0
+      throughput: 138.79250520471894
       estimated_peak_memory_range:
-        min: 61440
-        max: 14772576
+        min: 663552
+        max: 31484880
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,7 +104,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jopr1e9kg
+      job_id: j1p38yzl5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -139,13 +113,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:31:20Z'
+    timestamp: '2024-06-22T22:34:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 39816.0
-      throughput: 25.11553144464537
+      inference_time: 7101.0
+      throughput: 140.8252358822701
       estimated_peak_memory_range:
-        min: 122880
-        max: 41244048
+        min: 643072
+        max: 2120000
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -153,22 +127,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jep23lj6g
+      job_id: jwgomjlx5
       job_status: Passed
     reference_device_info:
-      name: RB3 Gen 2 (Proxy)
-      os: '12'
-      form_factor: Iot
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs6490
-    timestamp: '2024-06-08T22:31:21Z'
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:34:21Z'
   - torchscript_onnx_tflite:
-      inference_time: 203928.0
-      throughput: 4.903691498960417
+      inference_time: 38864.0
+      throughput: 25.73075339645945
       estimated_peak_memory_range:
-        min: 225280
-        max: 7415104
+        min: 126976
+        max: 44890592
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -176,36 +150,36 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jqpyv6n0p
+      job_id: j1pv4jljp
       job_status: Passed
     reference_device_info:
-      name: RB5 (Proxy)
+      name: RB3 Gen 2 (Proxy)
       os: '12'
       form_factor: Iot
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs8250
-    timestamp: '2024-06-08T22:31:22Z'
-  - torchscript_onnx_ort:
-      inference_time: 8994.0
-      throughput: 111.185234600845
+      chipset: Qcs6490
+    timestamp: '2024-06-22T22:34:22Z'
+  - torchscript_onnx_tflite:
+      inference_time: 198804.0
+      throughput: 5.030079877668458
       estimated_peak_memory_range:
-        min: 6340608
-        max: 6340608
+        min: 884736
+        max: 3017472
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 113
+        layers_on_npu: 118
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 113
-      job_id: j1p3qexm5
+        total_layers: 118
+      job_id: j7gj1jrxg
       job_status: Passed
     reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
       manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:31:30Z'
+      chipset: Qcs8250
+    timestamp: '2024-06-22T22:34:23Z'
diff --git a/qai_hub_models/models/ffnet_78s/export.py b/qai_hub_models/models/ffnet_78s/export.py
index f0bec0bf..fd346e9b 100644
--- a/qai_hub_models/models/ffnet_78s/export.py
+++ b/qai_hub_models/models/ffnet_78s/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml
index 1d0087dd..ea209937 100644
--- a/qai_hub_models/models/ffnet_78s/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: FFNet-78S
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 29896.0
-      throughput: 33.44929087503345
+      inference_time: 23277.0
+      throughput: 42.960862654122096
       estimated_peak_memory_range:
-        min: 2584576
-        max: 5177832
+        min: 192512
+        max: 2381224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: j1pvzvezg
+      job_id: jqp48ze8g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23500.0
-      throughput: 42.5531914893617
+      inference_time: 24928.0
+      throughput: 40.11553273427471
       estimated_peak_memory_range:
-        min: 25223168
-        max: 55846352
+        min: 25337856
+        max: 47478816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jygzvr84p
+      job_id: jopr9zx0p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 34791.0
-      throughput: 28.743065735391337
+    torchscript_onnx:
+      inference_time: 33817.0
+      throughput: 29.570925865688853
       estimated_peak_memory_range:
-        min: 31657984
-        max: 174636584
+        min: 897024
+        max: 144124064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: jvgd7o86g
+      job_id: jogkdj6wp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:32:03Z'
+    timestamp: '2024-06-22T22:35:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 21247.0
-      throughput: 47.065468066079916
+      inference_time: 17275.0
+      throughput: 57.88712011577424
       estimated_peak_memory_range:
-        min: 684032
-        max: 120904016
+        min: 1945600
+        max: 118752096
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: j7gjkeo15
+      job_id: j0pxmw03g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 17520.0
-      throughput: 57.077625570776256
+      inference_time: 17799.0
+      throughput: 56.18293162537221
       estimated_peak_memory_range:
-        min: 21012480
-        max: 102988784
+        min: 21008384
+        max: 92892240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jz5wmq84g
+      job_id: jep2j2or5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 25762.0
-      throughput: 38.816862044872295
+    torchscript_onnx:
+      inference_time: 26360.0
+      throughput: 37.93626707132018
       estimated_peak_memory_range:
-        min: 31490048
-        max: 82980160
+        min: 29413376
+        max: 78729008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: jz57vxkn5
+      job_id: jn5qwj4n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:32:04Z'
+    timestamp: '2024-06-22T22:35:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 29131.0
-      throughput: 34.327692149256805
+      inference_time: 24022.0
+      throughput: 41.62850720173174
       estimated_peak_memory_range:
-        min: 2592768
-        max: 5433672
+        min: 2560000
+        max: 4718240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jlpe4k885
+      job_id: jo5m4j9d5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23774.0
-      throughput: 42.06275763439051
+      inference_time: 24147.0
+      throughput: 41.41301196836046
       estimated_peak_memory_range:
-        min: 27922432
-        max: 51160616
+        min: 25194496
+        max: 46049992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jnp1qe7ng
+      job_id: j2p0kno95
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:32:02Z'
+    timestamp: '2024-06-22T22:35:11Z'
+  - torchscript_onnx_tflite:
+      inference_time: 23700.0
+      throughput: 42.19409282700422
+      estimated_peak_memory_range:
+        min: 126976
+        max: 2246520
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 149
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 149
+      job_id: jegnxj1k5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 23730.0
+      throughput: 42.14075010535188
+      estimated_peak_memory_range:
+        min: 25210880
+        max: 40207040
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 235
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 235
+      job_id: j1p88ljkp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:35:13Z'
   - torchscript_onnx_qnn:
-      inference_time: 32569.0
-      throughput: 30.70404372255826
+      inference_time: 24214.0
+      throughput: 41.29842240026431
       estimated_peak_memory_range:
-        min: 25214976
-        max: 25214976
+        min: 25219072
+        max: 25219072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,11 +232,11 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 235
-      job_id: jmg99wkmg
+      job_id: jqpyn988g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 33100.0
-      throughput: 30.211480362537763
+    torchscript_onnx:
+      inference_time: 33104.0
+      throughput: 30.207829869502174
       estimated_peak_memory_range:
         min: 25219072
         max: 25219072
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 237
-      job_id: jqp4jvm2p
+      job_id: j1gl7jwj5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:32:05Z'
+    timestamp: '2024-06-22T22:35:16Z'
diff --git a/qai_hub_models/models/ffnet_78s_lowres/export.py b/qai_hub_models/models/ffnet_78s_lowres/export.py
index 1ee8b996..c2b5dc43 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/export.py
+++ b/qai_hub_models/models/ffnet_78s_lowres/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
index 9f48808e..598a9a41 100644
--- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: FFNet-78S-LowRes
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 10698.0
-      throughput: 93.47541596560104
+      inference_time: 7397.0
+      throughput: 135.189941868325
       estimated_peak_memory_range:
-        min: 12288
-        max: 8183320
+        min: 684032
+        max: 2700304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jo5mv3o75
+      job_id: j1p38yo35
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11228.0
-      throughput: 89.06305664410402
+      inference_time: 7650.0
+      throughput: 130.718954248366
       estimated_peak_memory_range:
-        min: 2109440
-        max: 55500544
+        min: 6258688
+        max: 35155256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: jep23l46g
+      job_id: jlpe2jxop
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 8904.0
-      throughput: 112.30907457322552
+    torchscript_onnx:
+      inference_time: 8935.0
+      throughput: 111.9194180190263
       estimated_peak_memory_range:
-        min: 1257472
-        max: 128438216
+        min: 1785856
+        max: 118205632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 238
-      job_id: jogkr39v5
+      job_id: jvgd0j4rp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:32:36Z'
+    timestamp: '2024-06-22T22:35:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 7663.0
-      throughput: 130.49719431032233
+      inference_time: 5221.0
+      throughput: 191.5341888527102
       estimated_peak_memory_range:
-        min: 159744
-        max: 55453776
+        min: 638976
+        max: 61606000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jegnr3oj5
+      job_id: jwgomjdq5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7958.0
-      throughput: 125.65971349585323
+      inference_time: 5413.0
+      throughput: 184.74043968224643
       estimated_peak_memory_range:
-        min: 6307840
-        max: 77174624
+        min: 6311936
+        max: 66614816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: jqpyv6q0p
+      job_id: jygzw1yog
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6766.0
-      throughput: 147.79781259237365
+    torchscript_onnx:
+      inference_time: 6352.0
+      throughput: 157.43073047858942
       estimated_peak_memory_range:
         min: 6307840
-        max: 49412144
+        max: 46772624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 238
-      job_id: jn5q93mep
+      job_id: jz576qnvg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:32:37Z'
+    timestamp: '2024-06-22T22:35:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 10676.0
-      throughput: 93.66804046459347
+      inference_time: 7384.0
+      throughput: 135.42795232936078
       estimated_peak_memory_range:
-        min: 569344
-        max: 2852616
+        min: 544768
+        max: 2903944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 149
-      job_id: jopr1eokg
+      job_id: j1pv4jmkp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11306.0
-      throughput: 88.44861135680169
+      inference_time: 7668.0
+      throughput: 130.41210224308816
       estimated_peak_memory_range:
-        min: 16384
-        max: 52829760
+        min: 6307840
+        max: 26699136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: j1p8wz4qp
+      job_id: jmg9862wp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:32:35Z'
+    timestamp: '2024-06-22T22:35:47Z'
+  - torchscript_onnx_tflite:
+      inference_time: 7357.0
+      throughput: 135.92496941688188
+      estimated_peak_memory_range:
+        min: 16384
+        max: 1854216
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 149
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 149
+      job_id: j7gj1jyvg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 7765.0
+      throughput: 128.78300064391502
+      estimated_peak_memory_range:
+        min: 6385664
+        max: 24453616
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 236
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 236
+      job_id: jnp13r185
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:35:48Z'
   - torchscript_onnx_qnn:
-      inference_time: 20526.0
-      throughput: 48.718698236383126
+      inference_time: 7505.0
+      throughput: 133.24450366422386
       estimated_peak_memory_range:
         min: 6303744
         max: 6303744
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 236
-      job_id: j2p0elv05
+      job_id: jz5wxjz3p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 8769.0
-      throughput: 114.03808872163303
+    torchscript_onnx:
+      inference_time: 8714.0
+      throughput: 114.75786091347257
       estimated_peak_memory_range:
-        min: 30912512
-        max: 30912512
+        min: 46301184
+        max: 46301184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 238
-      job_id: j1gle312p
+      job_id: jqp48z48g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:32:38Z'
+    timestamp: '2024-06-22T22:35:52Z'
diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py
index c09312bb..90c234b5 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/export.py
+++ b/qai_hub_models/models/ffnet_78s_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +216,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -229,7 +229,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
index 9f134ee8..98ce0e40 100644
--- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
+++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: FFNet-78S-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 8325.0
-      throughput: 120.12012012012012
+      inference_time: 8346.0
+      throughput: 119.81787682722262
       estimated_peak_memory_range:
-        min: 663552
-        max: 8732048
+        min: 24576
+        max: 39754352
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,22 +58,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: j1p3qewm5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9764.0
-      throughput: 102.41704219582138
-      estimated_peak_memory_range:
-        min: 7573504
-        max: 52534152
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 149
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 149
-      job_id: jvgd7oz6g
+      job_id: jo5m4jkd5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +67,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:33:40Z'
+    timestamp: '2024-06-22T22:36:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 6002.0
-      throughput: 166.61112962345885
+      inference_time: 6005.0
+      throughput: 166.5278934221482
       estimated_peak_memory_range:
-        min: 57344
-        max: 86915504
+        min: 655360
+        max: 97393488
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,22 +81,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: jwgoe341p
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7233.0
-      throughput: 138.25521913452232
-      estimated_peak_memory_range:
-        min: 8347648
-        max: 53601040
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 149
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 149
-      job_id: jz57vx7n5
+      job_id: jegnxjqk5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +90,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:33:41Z'
+    timestamp: '2024-06-22T22:36:45Z'
   - torchscript_onnx_tflite:
-      inference_time: 8359.0
-      throughput: 119.63153487259241
+      inference_time: 8457.0
+      throughput: 118.24524062906468
       estimated_peak_memory_range:
-        min: 679936
-        max: 2337912
+        min: 708608
+        max: 2335160
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,7 +104,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: j1pvzv9zg
+      job_id: jopr9zd0p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -139,13 +113,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:33:33Z'
+    timestamp: '2024-06-22T22:36:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 44458.0
-      throughput: 22.49313959242431
+      inference_time: 8334.0
+      throughput: 119.99040076793857
       estimated_peak_memory_range:
-        min: 729088
-        max: 44729792
+        min: 688128
+        max: 2311856
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -153,22 +127,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: j7gjkew15
+      job_id: jep2j2dr5
       job_status: Passed
     reference_device_info:
-      name: RB3 Gen 2 (Proxy)
-      os: '12'
-      form_factor: Iot
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs6490
-    timestamp: '2024-06-08T22:33:33Z'
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:36:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 219858.0
-      throughput: 4.548390324664101
+      inference_time: 44723.0
+      throughput: 22.35985958008184
       estimated_peak_memory_range:
-        min: 393216
-        max: 2901200
+        min: 729088
+        max: 49186032
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -176,36 +150,36 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 154
-      job_id: jlpe4kl85
+      job_id: jqpyn928g
       job_status: Passed
     reference_device_info:
-      name: RB5 (Proxy)
+      name: RB3 Gen 2 (Proxy)
       os: '12'
       form_factor: Iot
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs8250
-    timestamp: '2024-06-08T22:33:34Z'
-  - torchscript_onnx_ort:
-      inference_time: 9426.0
-      throughput: 106.08953957139826
+      chipset: Qcs6490
+    timestamp: '2024-06-22T22:36:48Z'
+  - torchscript_onnx_tflite:
+      inference_time: 220960.0
+      throughput: 4.525706010137582
       estimated_peak_memory_range:
-        min: 5931008
-        max: 5931008
+        min: 663552
+        max: 10246232
       primary_compute_unit: NPU
       precision: int8
       layer_info:
-        layers_on_npu: 149
+        layers_on_npu: 154
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 149
-      job_id: jqp4jv92p
+        total_layers: 154
+      job_id: j2p0kn995
       job_status: Passed
     reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
       manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:33:42Z'
+      chipset: Qcs8250
+    timestamp: '2024-06-22T22:36:49Z'
diff --git a/qai_hub_models/models/googlenet/export.py b/qai_hub_models/models/googlenet/export.py
index e611b7d9..d1493151 100644
--- a/qai_hub_models/models/googlenet/export.py
+++ b/qai_hub_models/models/googlenet/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml
index 46726a11..00534209 100644
--- a/qai_hub_models/models/googlenet/perf.yaml
+++ b/qai_hub_models/models/googlenet/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: GoogLeNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1052.0
-      throughput: 950.5703422053232
+      inference_time: 1046.0
+      throughput: 956.0229445506692
       estimated_peak_memory_range:
-        min: 73728
-        max: 1671408
+        min: 24576
+        max: 1843352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: j7gjkeq15
+      job_id: j1gl7jqj5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 1088.0
       throughput: 919.1176470588235
       estimated_peak_memory_range:
         min: 16384
-        max: 26332424
+        max: 90728480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: jz5wmq44g
+      job_id: j1pv4jykp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1306.0
-      throughput: 765.6967840735069
+    torchscript_onnx:
+      inference_time: 1253.0
+      throughput: 798.0845969672786
       estimated_peak_memory_range:
-        min: 81920
-        max: 33177416
+        min: 12288
+        max: 35524544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: jz57vx9n5
+      job_id: jmg9867wp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:34:47Z'
+    timestamp: '2024-06-22T22:38:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 686.0
-      throughput: 1457.725947521866
+      inference_time: 681.0
+      throughput: 1468.4287812041116
       estimated_peak_memory_range:
         min: 16384
-        max: 47804608
+        max: 50308432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jlpe4ky85
+      job_id: jw56vk06p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 700.0
-      throughput: 1428.5714285714287
+      inference_time: 699.0
+      throughput: 1430.615164520744
       estimated_peak_memory_range:
         min: 0
-        max: 53870528
+        max: 49524992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: jmg99wdmg
+      job_id: j7gj1j6vg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 828.0
-      throughput: 1207.729468599034
+    torchscript_onnx:
+      inference_time: 840.0
+      throughput: 1190.4761904761904
       estimated_peak_memory_range:
-        min: 618496
-        max: 31247424
+        min: 0
+        max: 27602224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: jqp4jv32p
+      job_id: jnp13rk85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:34:48Z'
+    timestamp: '2024-06-22T22:38:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 1048.0
-      throughput: 954.1984732824427
+      inference_time: 1038.0
+      throughput: 963.3911368015414
       estimated_peak_memory_range:
-        min: 40960
-        max: 17749600
+        min: 24576
+        max: 190855136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jygzvrn4p
+      job_id: j1p38yr35
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1098.0
-      throughput: 910.7468123861566
+      inference_time: 1088.0
+      throughput: 919.1176470588235
       estimated_peak_memory_range:
-        min: 491520
-        max: 26782184
+        min: 634880
+        max: 5200568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: jvgd7o26g
+      job_id: jygzw1qog
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:34:46Z'
+    timestamp: '2024-06-22T22:38:14Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1048.0
+      throughput: 954.1984732824427
+      estimated_peak_memory_range:
+        min: 12288
+        max: 2028152
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 84
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 84
+      job_id: jwgomj9q5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1099.0
+      throughput: 909.9181073703367
+      estimated_peak_memory_range:
+        min: 36864
+        max: 37178992
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 143
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 143
+      job_id: jz5wxj03p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:38:15Z'
   - torchscript_onnx_qnn:
-      inference_time: 1266.0
-      throughput: 789.8894154818325
+      inference_time: 1231.0
+      throughput: 812.3476848090983
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 143
-      job_id: jnp1qe6ng
+      job_id: jlpe2j0op
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1388.0
-      throughput: 720.4610951008646
+    torchscript_onnx:
+      inference_time: 1329.0
+      throughput: 752.4454477050414
       estimated_peak_memory_range:
-        min: 671744
-        max: 671744
+        min: 26718208
+        max: 26718208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 145
-      job_id: j0pxeyx85
+      job_id: jvgd0jyrp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:34:49Z'
+    timestamp: '2024-06-22T22:38:19Z'
diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py
index c9504b86..ca9d6b79 100644
--- a/qai_hub_models/models/googlenet_quantized/export.py
+++ b/qai_hub_models/models/googlenet_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/googlenet_quantized/model.py b/qai_hub_models/models/googlenet_quantized/model.py
index e41ef8c4..ca5a57f1 100644
--- a/qai_hub_models/models/googlenet_quantized/model.py
+++ b/qai_hub_models/models/googlenet_quantized/model.py
@@ -82,5 +82,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml
index d51b481d..577bb677 100644
--- a/qai_hub_models/models/googlenet_quantized/perf.yaml
+++ b/qai_hub_models/models/googlenet_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: GoogLeNetQuantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 298.0
-      throughput: 3355.7046979865772
+      inference_time: 311.0
+      throughput: 3215.434083601286
       estimated_peak_memory_range:
-        min: 20480
-        max: 1284320
+        min: 12288
+        max: 9967728
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jegnr3kj5
+      job_id: jqp48z68g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 342.0
-      throughput: 2923.9766081871344
+      inference_time: 352.0
+      throughput: 2840.909090909091
       estimated_peak_memory_range:
         min: 16384
-        max: 10406440
+        max: 79166928
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: j1p8wzxqp
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 523.0
-      throughput: 1912.0458891013384
-      estimated_peak_memory_range:
-        min: 12288
-        max: 12422920
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 91
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 91
-      job_id: jw56qn7ng
+      job_id: jqpyn9k8g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:35:33Z'
+    timestamp: '2024-06-22T22:38:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 237.0
-      throughput: 4219.4092827004215
+      inference_time: 229.0
+      throughput: 4366.812227074236
       estimated_peak_memory_range:
         min: 12288
-        max: 34025648
+        max: 36833376
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jopr1ewkg
+      job_id: j0pxmw83g
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 244.0
       throughput: 4098.360655737705
       estimated_peak_memory_range:
-        min: 0
-        max: 42694240
+        min: 163840
+        max: 40895152
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jogkr34v5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 393.0
-      throughput: 2544.529262086514
-      estimated_peak_memory_range:
-        min: 12288
-        max: 30491248
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 91
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 91
-      job_id: j1p3qe9m5
+      job_id: j2p0kn895
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:35:34Z'
+    timestamp: '2024-06-22T22:39:01Z'
   - torchscript_onnx_tflite:
       inference_time: 298.0
       throughput: 3355.7046979865772
       estimated_peak_memory_range:
         min: 20480
-        max: 1812976
+        max: 1383264
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jep23le6g
+      job_id: jo5m4j1d5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 335.0
-      throughput: 2985.0746268656717
+      inference_time: 342.0
+      throughput: 2923.9766081871344
       estimated_peak_memory_range:
-        min: 167936
-        max: 10553224
+        min: 172032
+        max: 4165120
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: j1gle3x2p
+      job_id: jogkdjwwp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:35:32Z'
+    timestamp: '2024-06-22T22:39:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 964.0
-      throughput: 1037.344398340249
+      inference_time: 295.0
+      throughput: 3389.830508474576
       estimated_peak_memory_range:
         min: 12288
-        max: 18322160
+        max: 1439240
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: jqpyv6m0p
+      job_id: jegnxjdk5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 336.0
+      throughput: 2976.190476190476
+      estimated_peak_memory_range:
+        min: 16384
+        max: 127409384
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 86
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 86
+      job_id: jn5qwjxn5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:39:05Z'
+  - torchscript_onnx_tflite:
+      inference_time: 959.0
+      throughput: 1042.752867570386
+      estimated_peak_memory_range:
+        min: 36864
+        max: 20375936
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 84
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 84
+      job_id: jopr9zm0p
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T22:35:26Z'
+    timestamp: '2024-06-22T22:38:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 5711.0
-      throughput: 175.1006828926633
+      inference_time: 5663.0
+      throughput: 176.58484901995408
       estimated_peak_memory_range:
-        min: 16384
-        max: 2182760
+        min: 57344
+        max: 1977656
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 84
-      job_id: j2p0elj05
+      job_id: jep2j2qr5
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T22:35:27Z'
+    timestamp: '2024-06-22T22:38:58Z'
   - torchscript_onnx_qnn:
-      inference_time: 438.0
-      throughput: 2283.10502283105
+      inference_time: 439.0
+      throughput: 2277.904328018223
       estimated_peak_memory_range:
-        min: 536576
-        max: 536576
+        min: 528384
+        max: 528384
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 86
-      job_id: jn5q93yep
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 526.0
-      throughput: 1901.1406844106464
-      estimated_peak_memory_range:
-        min: 11812864
-        max: 11812864
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 91
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 91
-      job_id: jwgoe3r1p
+      job_id: j1p88ldkp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:35:35Z'
+    timestamp: '2024-06-22T22:39:02Z'
diff --git a/qai_hub_models/models/hrnet_pose/export.py b/qai_hub_models/models/hrnet_pose/export.py
index 6853ea8f..80e33193 100644
--- a/qai_hub_models/models/hrnet_pose/export.py
+++ b/qai_hub_models/models/hrnet_pose/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/hrnet_pose/model.py b/qai_hub_models/models/hrnet_pose/model.py
index c4d3c102..184ebd11 100644
--- a/qai_hub_models/models/hrnet_pose/model.py
+++ b/qai_hub_models/models/hrnet_pose/model.py
@@ -71,7 +71,7 @@ def from_pretrained(cls) -> HRNetPose:
             cfg.freeze()
             net = PoseHighResolutionNet(cfg)
             net.load_state_dict(weights)
-            return cls(net).eval()
+            return cls(net)
 
     def forward(self, image):
         """
diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml
index 26ccc19f..ffae2b22 100644
--- a/qai_hub_models/models/hrnet_pose/perf.yaml
+++ b/qai_hub_models/models/hrnet_pose/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: HRNetPose
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2822.0
-      throughput: 354.3586109142452
+      inference_time: 2824.0
+      throughput: 354.10764872521247
       estimated_peak_memory_range:
-        min: 28672
-        max: 2472016
+        min: 24576
+        max: 2661720
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 516
-      job_id: j7gjke715
+      job_id: jlpe2jqop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2908.0
-      throughput: 343.878954607978
+      inference_time: 2922.0
+      throughput: 342.23134839151265
       estimated_peak_memory_range:
-        min: 16384
-        max: 21168936
+        min: 36864
+        max: 17867304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jz5wmq74g
+      job_id: jnp13r985
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3074.0
-      throughput: 325.30904359141186
+    torchscript_onnx:
+      inference_time: 3097.0
+      throughput: 322.8931223764934
       estimated_peak_memory_range:
         min: 12288
-        max: 131380776
+        max: 131694472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 749
-      job_id: jz5wmq7zg
+      job_id: jvgd0jkzp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:36:14Z'
+    timestamp: '2024-06-22T22:39:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 2066.0
-      throughput: 484.027105517909
+      inference_time: 2048.0
+      throughput: 488.28125
       estimated_peak_memory_range:
         min: 16384
-        max: 109820208
+        max: 120963120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 516
-      job_id: jlpe4kz85
+      job_id: jygzw16og
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2134.0
-      throughput: 468.6035613870665
+      inference_time: 2125.0
+      throughput: 470.5882352941176
       estimated_peak_memory_range:
-        min: 606208
-        max: 190071840
+        min: 651264
+        max: 163638912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jmg99wmmg
+      job_id: jvgd0jkrp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2205.0
-      throughput: 453.51473922902494
+    torchscript_onnx:
+      inference_time: 2246.0
+      throughput: 445.2359750667854
       estimated_peak_memory_range:
         min: 12288
-        max: 92302688
+        max: 75139088
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 749
-      job_id: jmg99wmqg
+      job_id: jz576qm9g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:36:15Z'
+    timestamp: '2024-06-22T22:39:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 2832.0
-      throughput: 353.1073446327684
+      inference_time: 2827.0
+      throughput: 353.73187124159887
       estimated_peak_memory_range:
-        min: 28672
-        max: 3094624
+        min: 49152
+        max: 2840272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 516
-      job_id: jygzvrm4p
+      job_id: jz5wxjk3p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2903.0
-      throughput: 344.47123665173956
+      inference_time: 2900.0
+      throughput: 344.82758620689657
       estimated_peak_memory_range:
-        min: 12288
-        max: 20792584
+        min: 606208
+        max: 15481792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jvgd7o36g
+      job_id: jmg986r8p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:36:13Z'
+    timestamp: '2024-06-22T22:39:50Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2836.0
+      throughput: 352.60930888575456
+      estimated_peak_memory_range:
+        min: 32768
+        max: 2799448
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: jmg986rwp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2927.0
+      throughput: 341.646737273659
+      estimated_peak_memory_range:
+        min: 12288
+        max: 21375320
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 747
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 747
+      job_id: jnp13r975
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:39:52Z'
   - torchscript_onnx_qnn:
-      inference_time: 3132.0
-      throughput: 319.28480204342276
+      inference_time: 2886.0
+      throughput: 346.5003465003465
       estimated_peak_memory_range:
-        min: 897024
-        max: 897024
+        min: 589824
+        max: 589824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 747
-      job_id: jnp1qejng
+      job_id: jz5wxjkmp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2963.0
-      throughput: 337.4957813027337
+    torchscript_onnx:
+      inference_time: 2975.0
+      throughput: 336.1344537815126
       estimated_peak_memory_range:
-        min: 49115136
-        max: 49115136
+        min: 44404736
+        max: 44404736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 749
-      job_id: jnp1qejkg
+      job_id: jqp48z71g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:36:16Z'
+    timestamp: '2024-06-22T22:39:55Z'
diff --git a/qai_hub_models/models/hrnet_pose_quantized/README.md b/qai_hub_models/models/hrnet_pose_quantized/README.md
new file mode 100644
index 00000000..11dac69c
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/README.md
@@ -0,0 +1,61 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [HRNetPoseQuantized: Perform accurate human pose estimation](https://aihub.qualcomm.com/models/hrnet_pose_quantized)
+
+HRNet performs pose estimation in high-resolution representations.
+
+This is based on the implementation of HRNetPoseQuantized found
+[here](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+
+
+## Example & Usage
+
+Install the package via pip:
+```bash
+pip install "qai_hub_models[hrnet_pose_quantized]"
+```
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.hrnet_pose_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.hrnet_pose_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of HRNetPoseQuantized can be found
+  [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf).
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
+
+## References
+* [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212)
+* [Source Model Implementation](https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet)
+
+## Community
+* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/hrnet_pose_quantized/__init__.py b/qai_hub_models/models/hrnet_pose_quantized/__init__.py
new file mode 100644
index 00000000..26dbe409
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/__init__.py
@@ -0,0 +1,8 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp  # noqa: F401
+
+from .model import MODEL_ID  # noqa: F401
+from .model import HRNetPoseQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/hrnet_pose_quantized/conftest.py b/qai_hub_models/models/hrnet_pose_quantized/conftest.py
new file mode 100644
index 00000000..c4b5e588
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/conftest.py
@@ -0,0 +1,39 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+import inspect
+
+import pytest
+
+from qai_hub_models.models.hrnet_pose_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+# Instantiate the model only once for all tests.
+# Mock from_pretrained to always return the initialized model.
+# This speeds up tests and limits memory leaks.
+@pytest.fixture(scope="module", autouse=True)
+def cached_from_pretrained():
+    with pytest.MonkeyPatch.context() as mp:
+        pretrained_cache = {}
+        from_pretrained = Model.from_pretrained
+        sig = inspect.signature(from_pretrained)
+
+        @skip_clone_repo_check
+        def _cached_from_pretrained(*args, **kwargs):
+            cache_key = str(args) + str(kwargs)
+            model = pretrained_cache.get(cache_key, None)
+            if model:
+                return model
+            else:
+                model = from_pretrained(*args, **kwargs)
+                pretrained_cache[cache_key] = model
+                return model
+
+        _cached_from_pretrained.__signature__ = sig
+
+        mp.setattr(Model, "from_pretrained", _cached_from_pretrained)
+        yield mp
diff --git a/qai_hub_models/models/hrnet_pose_quantized/demo.py b/qai_hub_models/models/hrnet_pose_quantized/demo.py
new file mode 100644
index 00000000..a5eca7ae
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/demo.py
@@ -0,0 +1,57 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp
+from qai_hub_models.models.hrnet_pose_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    HRNetPoseQuantizable,
+)
+from qai_hub_models.utils.args import (
+    demo_model_from_cli_args,
+    get_model_cli_parser,
+    get_on_device_demo_parser,
+    validate_on_device_demo_args,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
+from qai_hub_models.utils.display import display_or_save_image
+
+IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "hrnet_pose_demo.png"
+)
+
+
+# The demo will display a image with the predicted keypoints.
+def main(is_test: bool = False):
+    # Demo parameters
+    parser = get_model_cli_parser(HRNetPoseQuantizable)
+    parser = get_on_device_demo_parser(parser, add_output_dir=True)
+    parser.add_argument(
+        "--image",
+        type=str,
+        default=IMAGE_ADDRESS,
+        help="image file path or URL",
+    )
+
+    args = parser.parse_args([] if is_test else None)
+    validate_on_device_demo_args(args, MODEL_ID)
+
+    # Load image & model
+    model = demo_model_from_cli_args(HRNetPoseQuantizable, MODEL_ID, args)
+    image = load_image(args.image)
+    print("Model Loaded")
+
+    app = HRNetPoseApp(model)
+    keypoints = app.predict_pose_keypoints(image)[0]
+    if not is_test:
+        display_or_save_image(
+            keypoints,
+            args.output_dir,
+            "hrnetpose_quantized_demo_output.png",
+            "keypoints",
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/hrnet_pose_quantized/export.py b/qai_hub_models/models/hrnet_pose_quantized/export.py
new file mode 100644
index 00000000..f1bb104e
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/export.py
@@ -0,0 +1,241 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.hrnet_pose_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+    transpose_channel_last_to_first,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23 (Family)",
+    chipset: Optional[str] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        chipset: If set, will choose a random device with this chipset.
+            Overrides the `device` argument.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "hrnet_pose_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if chipset:
+        hub_device = hub.Device(attributes=f"chipset:{chipset}")
+    else:
+        hub_device = hub.Device(name=device)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "hrnet_pose_quantized",
+            "HRNetPoseQuantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
+    channel_last_flags = (
+        " --force_channel_last_input image" + " --force_channel_last_output output_0"
+        if target_runtime != TargetRuntime.ONNX
+        else ""
+    )
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options + channel_last_flags, hub_device
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub_device,
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = (
+            sample_inputs
+            if target_runtime == TargetRuntime.ONNX
+            else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = "so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = "tflite"
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
+            target_runtime_extension = "onnx"
+
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        # Convert outputs from channel last to channel first
+        inference_result = (
+            inference_result
+            if target_runtime == TargetRuntime.ONNX
+            else transpose_channel_last_to_first(
+                "output_0", inference_result, target_runtime
+            )
+        )
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model, supports_onnx=False)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/hrnet_pose_quantized/info.yaml b/qai_hub_models/models/hrnet_pose_quantized/info.yaml
new file mode 100644
index 00000000..9c051f2b
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/info.yaml
@@ -0,0 +1,36 @@
+name: HRNetPoseQuantized
+# id must match with the model dir name in qai_hub_models
+id: hrnet_pose_quantized
+status: public
+headline: Perform accurate human pose estimation.
+domain: Computer Vision
+use_case: Pose Estimation
+description: HRNet performs pose estimation in high-resolution representations.
+tags:
+  - quantized
+research_paper: https://arxiv.org/abs/1902.09212
+research_paper_title: Deep High-Resolution Representation Learning for Human Pose
+  Estimation
+license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo:
+  https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/hrnet_posenet
+technical_details:
+  Model checkpoint: hrnet_posenet_FP32_state_dict
+  Input resolution: 256x192
+  Number of parameters: 28.5M
+  Model size: 109 MB
+applicable_scenarios:
+  - Injury prevention training
+  - Sports performance analysis
+  - Posture recognition
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+related_models: [litehrnet, hrnet_pose]
+has_static_banner: yes
+has_animated_banner: yes
+license_type: other
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/hrnet_pose_quantized/model.py b/qai_hub_models/models/hrnet_pose_quantized/model.py
new file mode 100644
index 00000000..ff7bf2e2
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/model.py
@@ -0,0 +1,72 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+    tie_observers,
+    constrain_quantized_inputs_to_image_range,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.hrnet_pose.model import HRNetPose
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 3
+DEFAULT_ENCODINGS = "hrnet_pose_quantized_encodings.json"
+
+
+class HRNetPoseQuantizable(AIMETQuantizableMixin, HRNetPose):
+    """HRNetPose with post training quantization support
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        hrnet_model: QuantizationSimModel,
+    ) -> None:
+        HRNetPose.__init__(self, hrnet_model.model)
+        AIMETQuantizableMixin.__init__(self, hrnet_model)
+
+    @classmethod
+    def from_pretrained(
+        cls, aimet_encodings: str | None = "DEFAULT"
+    ) -> HRNetPoseQuantizable:
+        model = HRNetPose.from_pretrained()
+        input_shape = HRNetPose.get_input_spec()["image"][0]
+        model = prepare_model(model)
+        equalize_model(model, input_shape)
+
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=torch.rand(input_shape),
+        )
+        tie_observers(sim)
+        constrain_quantized_inputs_to_image_range(sim)
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        final_model = cls(sim)
+        return final_model
diff --git a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml
new file mode 100644
index 00000000..c185397f
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml
@@ -0,0 +1,265 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - QCS8250 (Proxy)
+  - QCS8550 (Proxy)
+  - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy S24+
+  - Samsung Galaxy Tab S8
+  - Snapdragon X Elite CRD
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Qcs8250
+  - Qcs8550
+  - Sa8540p
+  - Sa8775p
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+  - Snapdragon® X Elite
+models:
+- name: HRNetPoseQuantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 958.0
+      throughput: 1043.8413361169103
+      estimated_peak_memory_range:
+        min: 24576
+        max: 1789808
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: jo5m4j795
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1238.0
+      throughput: 807.7544426494346
+      estimated_peak_memory_range:
+        min: 12288
+        max: 15030312
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 488
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 488
+      job_id: j1p88lnop
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-06-22T22:42:46Z'
+  - torchscript_onnx_tflite:
+      inference_time: 707.0
+      throughput: 1414.4271570014143
+      estimated_peak_memory_range:
+        min: 12288
+        max: 103834048
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: jegnxj4q5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 923.0
+      throughput: 1083.4236186348862
+      estimated_peak_memory_range:
+        min: 16384
+        max: 154376160
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 488
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 488
+      job_id: jogkdj1np
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-06-22T22:42:48Z'
+  - torchscript_onnx_tflite:
+      inference_time: 951.0
+      throughput: 1051.5247108307046
+      estimated_peak_memory_range:
+        min: 24576
+        max: 3344920
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: jopr9zr7p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1246.0
+      throughput: 802.5682182985554
+      estimated_peak_memory_range:
+        min: 16384
+        max: 11938672
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 488
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 488
+      job_id: j1gl7jjm5
+      job_status: Passed
+    reference_device_info:
+      name: QCS8550 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8550
+    timestamp: '2024-06-22T22:42:50Z'
+  - torchscript_onnx_tflite:
+      inference_time: 946.0
+      throughput: 1057.0824524312895
+      estimated_peak_memory_range:
+        min: 12288
+        max: 3002248
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: jep2j21q5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1240.0
+      throughput: 806.4516129032259
+      estimated_peak_memory_range:
+        min: 12288
+        max: 21634464
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 488
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 488
+      job_id: jw56vkkyp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:42:51Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3686.0
+      throughput: 271.2967986977754
+      estimated_peak_memory_range:
+        min: 24576
+        max: 64839808
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: jqpyn9llg
+      job_status: Passed
+    reference_device_info:
+      name: RB3 Gen 2 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs6490
+    timestamp: '2024-06-22T22:42:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 17208.0
+      throughput: 58.11250581125058
+      estimated_peak_memory_range:
+        min: 12288
+        max: 2244152
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 516
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 516
+      job_id: j2p0knwn5
+      job_status: Passed
+    reference_device_info:
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8250
+    timestamp: '2024-06-22T22:42:45Z'
+  - torchscript_onnx_qnn:
+      inference_time: 1292.0
+      throughput: 773.9938080495356
+      estimated_peak_memory_range:
+        min: 331776
+        max: 331776
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 488
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 488
+      job_id: jn5qwjno5
+      job_status: Passed
+    reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-06-22T22:42:49Z'
diff --git a/qai_hub_models/models/hrnet_pose_quantized/requirements.txt b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt
new file mode 100644
index 00000000..a112c877
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/requirements.txt
@@ -0,0 +1,5 @@
+aimet-torch==1.31.2; sys_platform == "linux"
+yacs==0.1.8
+mmpose==1.2.0
+mmcv==2.1.0
+mmdet==3.2.0
diff --git a/qai_hub_models/models/hrnet_pose_quantized/test.py b/qai_hub_models/models/hrnet_pose_quantized/test.py
new file mode 100644
index 00000000..70bb789b
--- /dev/null
+++ b/qai_hub_models/models/hrnet_pose_quantized/test.py
@@ -0,0 +1,42 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import numpy as np
+import torch
+
+from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp
+from qai_hub_models.models.hrnet_pose.demo import IMAGE_ADDRESS
+from qai_hub_models.models.hrnet_pose.demo import main as demo_main
+from qai_hub_models.models.hrnet_pose_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    HRNetPoseQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import (
+    CachedWebModelAsset,
+    load_image,
+    load_numpy,
+)
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+OUTPUT_KEYPOINTS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "hrnet_keypoints.npy"
+)
+
+
+@skip_clone_repo_check
+def test_task():
+    # AIMET Quantization Simulator introduces randomness. Eliminate that for this test.
+    torch.manual_seed(0)
+    image = load_image(IMAGE_ADDRESS)
+    model = HRNetPoseQuantizable.from_pretrained()
+    app = HRNetPoseApp(model=model)
+    output = app.predict(image, raw_output=True)
+    output_gt = load_numpy(OUTPUT_KEYPOINTS)
+    np.testing.assert_allclose(output, output_gt, atol=5)
+
+
+@skip_clone_repo_check
+def test_demo():
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
index ff097a89..b64ea30f 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py
@@ -35,7 +35,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -114,7 +114,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
@@ -171,7 +170,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -200,8 +199,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py
index f476aa98..4d633178 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py
@@ -191,13 +191,13 @@ def convert_to_wavlm_npu(model: WavLMModel):
     conv_layer = model.feature_extractor.conv_layers[0]
     assert isinstance(conv_layer, WavLMGroupNormConvLayer)
     # Replace with NPU friendly implementation
-    conv_layer_npu = WavLMGroupNormConvLayerNPU(conv_layer).eval()
+    conv_layer_npu = WavLMGroupNormConvLayerNPU(conv_layer)
     model.feature_extractor.conv_layers[0] = conv_layer_npu
 
     conv_layer1 = model.feature_extractor.conv_layers[1].conv
     assert isinstance(conv_layer1, torch.nn.Conv1d)
     # Replace with NPU friendly implementation
-    conv_layer1_npu = SliceConv1d(conv_layer1).eval()
+    conv_layer1_npu = SliceConv1d(conv_layer1)
     model.feature_extractor.conv_layers[1].conv = conv_layer1_npu
 
     return model
diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
index cf928d5f..a2d908eb 100644
--- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
+++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: HuggingFace-WavLM-Base-Plus
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 920916.0
-      throughput: 1.085875367568812
+      inference_time: 982866.0
+      throughput: 1.0174326917402778
       estimated_peak_memory_range:
-        min: 147881984
-        max: 155477640
+        min: 149377024
+        max: 152557208
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -48,7 +50,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 811
         total_layers: 811
-      job_id: jw56qn8vg
+      job_id: jz5wxjjmp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +59,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:39:46Z'
+    timestamp: '2024-06-22T22:43:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 819047.0
-      throughput: 1.220931155354943
+      inference_time: 829981.0
+      throughput: 1.2048468579401215
       estimated_peak_memory_range:
-        min: 148029440
-        max: 185119104
+        min: 149364736
+        max: 187276640
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -71,7 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 811
         total_layers: 811
-      job_id: j1p3qezx5
+      job_id: jmg98668p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:39:47Z'
+    timestamp: '2024-06-22T22:43:28Z'
   - torchscript_onnx_tflite:
-      inference_time: 932003.0
-      throughput: 1.0729579196633487
+      inference_time: 902406.0
+      throughput: 1.1081486603590844
       estimated_peak_memory_range:
-        min: 149381120
-        max: 153841752
+        min: 149413888
+        max: 152860856
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -94,7 +96,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 811
         total_layers: 811
-      job_id: jep2moqq5
+      job_id: jnp13rr75
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,4 +105,27 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-05-23T16:02:38Z'
+    timestamp: '2024-06-22T22:43:29Z'
+  - torchscript_onnx_tflite:
+      inference_time: 915091.0
+      throughput: 1.0927874932656971
+      estimated_peak_memory_range:
+        min: 148840448
+        max: 162000816
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 0
+        layers_on_cpu: 811
+        total_layers: 811
+      job_id: jvgd0jjzp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:43:30Z'
diff --git a/qai_hub_models/models/inception_v3/export.py b/qai_hub_models/models/inception_v3/export.py
index 868a0239..7bb1e75a 100644
--- a/qai_hub_models/models/inception_v3/export.py
+++ b/qai_hub_models/models/inception_v3/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml
index bf2b161e..f7a72623 100644
--- a/qai_hub_models/models/inception_v3/perf.yaml
+++ b/qai_hub_models/models/inception_v3/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Inception-v3
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1355.0
-      throughput: 738.0073800738007
+      inference_time: 1358.0
+      throughput: 736.3770250368188
       estimated_peak_memory_range:
-        min: 24576
-        max: 2203288
+        min: 16384
+        max: 1607904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jvgd7odkg
+      job_id: j2p0knnn5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1424.0
-      throughput: 702.2471910112359
+      inference_time: 1420.0
+      throughput: 704.2253521126761
       estimated_peak_memory_range:
         min: 16384
-        max: 150398664
+        max: 150142440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: j0pxeylj5
+      job_id: j1gl7jnm5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1714.0
-      throughput: 583.4305717619603
+    torchscript_onnx:
+      inference_time: 1745.0
+      throughput: 573.0659025787966
       estimated_peak_memory_range:
-        min: 24576
-        max: 216921632
+        min: 49152
+        max: 238093168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 221
-      job_id: jep23loxg
+      job_id: j7gj1jxeg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:40:24Z'
+    timestamp: '2024-06-22T22:44:11Z'
   - torchscript_onnx_tflite:
-      inference_time: 1026.0
-      throughput: 974.6588693957115
+      inference_time: 1029.0
+      throughput: 971.8172983479105
       estimated_peak_memory_range:
-        min: 12288
-        max: 54111920
+        min: 16384
+        max: 56966656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jz57vxeq5
+      job_id: j1p88llop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1055.0
-      throughput: 947.8672985781991
+      inference_time: 1051.0
+      throughput: 951.4747859181732
       estimated_peak_memory_range:
-        min: 0
-        max: 64200016
+        min: 618496
+        max: 55280240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: jo5mv30y5
+      job_id: jw56vk6yp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1328.0
-      throughput: 753.0120481927711
+    torchscript_onnx:
+      inference_time: 1309.0
+      throughput: 763.9419404125287
       estimated_peak_memory_range:
-        min: 0
-        max: 33764336
+        min: 618496
+        max: 28195584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 221
-      job_id: jqpyv68rp
+      job_id: jlpe2j9vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:40:25Z'
+    timestamp: '2024-06-22T22:44:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 1355.0
-      throughput: 738.0073800738007
+      inference_time: 1356.0
+      throughput: 737.4631268436578
       estimated_peak_memory_range:
-        min: 16384
-        max: 2130328
+        min: 28672
+        max: 2258048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 129
-      job_id: jqp4jvyqp
+      job_id: jogkdjjnp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1411.0
-      throughput: 708.7172218284904
+      inference_time: 1391.0
+      throughput: 718.9072609633357
       estimated_peak_memory_range:
-        min: 0
-        max: 150030456
+        min: 626688
+        max: 6108008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: jopr1exvg
+      job_id: jwgomjyk5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:40:23Z'
+    timestamp: '2024-06-22T22:44:09Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1376.0
+      throughput: 726.7441860465116
+      estimated_peak_memory_range:
+        min: 61440
+        max: 1719272
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 129
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 129
+      job_id: jn5qwjjo5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1409.0
+      throughput: 709.7232079488999
+      estimated_peak_memory_range:
+        min: 20480
+        max: 149840536
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 219
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 219
+      job_id: j1pv4j3rp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:44:10Z'
   - torchscript_onnx_qnn:
-      inference_time: 1503.0
-      throughput: 665.335994677312
+      inference_time: 1441.0
+      throughput: 693.9625260235947
       estimated_peak_memory_range:
-        min: 1097728
-        max: 1097728
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 219
-      job_id: jegnr31v5
+      job_id: j1p38ykn5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1640.0
-      throughput: 609.7560975609756
+    torchscript_onnx:
+      inference_time: 1655.0
+      throughput: 604.2296072507553
       estimated_peak_memory_range:
-        min: 39940096
-        max: 39940096
+        min: 29585408
+        max: 29585408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 221
-      job_id: j2p0elo25
+      job_id: jygzw1exg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:40:26Z'
+    timestamp: '2024-06-22T22:44:14Z'
diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py
index e6cd4f44..d4111ea5 100644
--- a/qai_hub_models/models/inception_v3_quantized/export.py
+++ b/qai_hub_models/models/inception_v3_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/inception_v3_quantized/model.py b/qai_hub_models/models/inception_v3_quantized/model.py
index 2a74e221..c5eaac55 100644
--- a/qai_hub_models/models/inception_v3_quantized/model.py
+++ b/qai_hub_models/models/inception_v3_quantized/model.py
@@ -85,5 +85,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml
index a770e878..bca3cb76 100644
--- a/qai_hub_models/models/inception_v3_quantized/perf.yaml
+++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: Inception-v3-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 615.0
-      throughput: 1626.0162601626016
+      inference_time: 619.0
+      throughput: 1615.5088852988692
       estimated_peak_memory_range:
-        min: 12288
-        max: 1478976
+        min: 32768
+        max: 1575264
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,29 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jogkr36y5
+      job_id: jmg986v8p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 646.0
-      throughput: 1547.9876160990711
-      estimated_peak_memory_range:
-        min: 12288
-        max: 165286688
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 134
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 134
-      job_id: jwgoe3d4p
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 844.0
-      throughput: 1184.8341232227488
+      inference_time: 652.0
+      throughput: 1533.7423312883436
       estimated_peak_memory_range:
-        min: 12288
-        max: 65222768
+        min: 16384
+        max: 251878408
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -84,7 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: jygzvrzzp
+      job_id: jo5m4jr95
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:41:32Z'
+    timestamp: '2024-06-22T22:45:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 486.0
-      throughput: 2057.61316872428
+      inference_time: 503.0
+      throughput: 1988.0715705765408
       estimated_peak_memory_range:
         min: 12288
-        max: 67571472
+        max: 70334384
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jn5q9347p
+      job_id: jnp13r075
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 496.0
-      throughput: 2016.1290322580646
+      inference_time: 493.0
+      throughput: 2028.3975659229209
       estimated_peak_memory_range:
-        min: 167936
-        max: 54564464
+        min: 163840
+        max: 47775424
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,14 +111,37 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: j1pvzv27g
+      job_id: jegnxj2q5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 659.0
-      throughput: 1517.4506828528072
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-06-22T22:45:19Z'
+  - torchscript_onnx_tflite:
+      inference_time: 631.0
+      throughput: 1584.7860538827258
       estimated_peak_memory_range:
         min: 12288
-        max: 43078608
+        max: 1574296
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jvgd0jwzp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 640.0
+      throughput: 1562.5
+      estimated_peak_memory_range:
+        min: 16384
+        max: 45098152
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -137,22 +149,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: jz5wmqyzg
+      job_id: jep2j28q5
       job_status: Passed
     reference_device_info:
-      name: Samsung Galaxy S24
-      os: '14'
-      form_factor: Phone
+      name: QCS8550 (Proxy)
+      os: '12'
+      form_factor: Iot
       os_name: Android
-      manufacturer: Samsung
-      chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:41:33Z'
+      manufacturer: Qualcomm
+      chipset: Qcs8550
+    timestamp: '2024-06-22T22:45:22Z'
   - torchscript_onnx_tflite:
       inference_time: 625.0
       throughput: 1600.0
       estimated_peak_memory_range:
-        min: 24576
-        max: 1548872
+        min: 12288
+        max: 1706288
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +172,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: j1gle3wep
+      job_id: jz576qz9g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 648.0
-      throughput: 1543.20987654321
+      inference_time: 639.0
+      throughput: 1564.9452269170579
       estimated_peak_memory_range:
-        min: 36864
-        max: 39620504
+        min: 16384
+        max: 28691688
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,22 +187,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: jlpe4k675
+      job_id: jqpyn9elg
       job_status: Passed
     reference_device_info:
-      name: QCS8550 (Proxy)
-      os: '12'
-      form_factor: Iot
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs8550
-    timestamp: '2024-06-08T22:41:31Z'
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:45:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 2357.0
-      throughput: 424.26813746287655
+      inference_time: 2365.0
+      throughput: 422.8329809725159
       estimated_peak_memory_range:
         min: 12288
-        max: 22249744
+        max: 25181632
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jw56qnovg
+      job_id: jqp48zq1g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T22:41:26Z'
+    timestamp: '2024-06-22T22:45:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 7805.0
-      throughput: 128.12299807815504
+      inference_time: 7614.0
+      throughput: 131.33701076963487
       estimated_peak_memory_range:
-        min: 16384
-        max: 2215816
+        min: 36864
+        max: 2710192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: j1p3qeox5
+      job_id: j0pxmwvlg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,10 +242,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T22:41:27Z'
+    timestamp: '2024-06-22T22:45:17Z'
   - torchscript_onnx_qnn:
-      inference_time: 706.0
-      throughput: 1416.4305949008499
+      inference_time: 714.0
+      throughput: 1400.5602240896358
       estimated_peak_memory_range:
         min: 450560
         max: 450560
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 134
-      job_id: j7gjke375
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 782.0
-      throughput: 1278.772378516624
-      estimated_peak_memory_range:
-        min: 12218368
-        max: 12218368
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 134
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 134
-      job_id: jmg99woqg
+      job_id: jopr9zk7p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:41:34Z'
+    timestamp: '2024-06-22T22:45:21Z'
diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py
index b4b49854..43b20241 100644
--- a/qai_hub_models/models/lama_dilated/export.py
+++ b/qai_hub_models/models/lama_dilated/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,14 +117,13 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image,mask"
         + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image,mask", sample_inputs, target_runtime
             )
@@ -190,7 +189,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -212,7 +211,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -227,7 +226,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_onnx=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml
index 87e48fb1..4682c186 100644
--- a/qai_hub_models/models/lama_dilated/perf.yaml
+++ b/qai_hub_models/models/lama_dilated/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: LaMa-Dilated
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 86343.0
-      throughput: 11.581714788691613
+      inference_time: 76792.0
+      throughput: 13.022189811438691
       estimated_peak_memory_range:
         min: 3289088
-        max: 139370192
+        max: 54930752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 344
+        layers_on_npu: 343
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 344
-      job_id: jvgd7o6kg
+        total_layers: 343
+      job_id: j1p38y3n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 81307.0
-      throughput: 12.299064041226462
+      inference_time: 70643.0
+      throughput: 14.155684214996532
       estimated_peak_memory_range:
-        min: 3371008
-        max: 42726616
+        min: 3166208
+        max: 43996992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: j0pxey0j5
+      job_id: jlpe2j1vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +74,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:42:12Z'
+    timestamp: '2024-06-22T22:46:12Z'
   - torchscript_onnx_tflite:
-      inference_time: 59391.0
-      throughput: 16.837567981680728
+      inference_time: 51609.0
+      throughput: 19.37646534519173
       estimated_peak_memory_range:
-        min: 53248
-        max: 241657616
+        min: 2371584
+        max: 239485152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 344
+        layers_on_npu: 343
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 344
-      job_id: jz57vxoq5
+        total_layers: 343
+      job_id: jwgomj0k5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 57168.0
-      throughput: 17.492303386509935
+      inference_time: 48645.0
+      throughput: 20.557097337855893
       estimated_peak_memory_range:
-        min: 2736128
-        max: 165991776
+        min: 4243456
+        max: 131020112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: jo5mv39y5
+      job_id: jygzw19xg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,28 +112,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:42:13Z'
+    timestamp: '2024-06-22T22:46:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 85709.0
-      throughput: 11.667386155479589
+      inference_time: 75718.0
+      throughput: 13.20689928418606
       estimated_peak_memory_range:
-        min: 3477504
-        max: 138753616
+        min: 3268608
+        max: 139238816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 344
+        layers_on_npu: 343
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 344
-      job_id: jqp4jveqp
+        total_layers: 343
+      job_id: j1pv4jorp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 81015.0
-      throughput: 12.343393198790347
+      inference_time: 70761.0
+      throughput: 14.13207840477099
       estimated_peak_memory_range:
-        min: 3174400
-        max: 43648896
+        min: 3223552
+        max: 40930456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -139,7 +141,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: jopr1edvg
+      job_id: jmg98618p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +150,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:42:16Z'
+    timestamp: '2024-06-22T22:46:16Z'
+  - torchscript_onnx_tflite:
+      inference_time: 75987.0
+      throughput: 13.160145814415623
+      estimated_peak_memory_range:
+        min: 2224128
+        max: 53851312
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 343
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 343
+      job_id: j7gj1jmeg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 71029.0
+      throughput: 14.078756564220248
+      estimated_peak_memory_range:
+        min: 3190784
+        max: 42527648
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 333
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 333
+      job_id: jnp13rl75
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:46:17Z'
   - torchscript_onnx_qnn:
-      inference_time: 91919.0
-      throughput: 10.879143593816295
+      inference_time: 70571.0
+      throughput: 14.170126539229996
       estimated_peak_memory_range:
         min: 4202496
         max: 4202496
@@ -162,7 +202,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 333
-      job_id: jegnr3qv5
+      job_id: jz5wxjvmp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -171,4 +211,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:42:14Z'
+    timestamp: '2024-06-22T22:46:15Z'
diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py
index 9dba5402..a508e28e 100644
--- a/qai_hub_models/models/litehrnet/export.py
+++ b/qai_hub_models/models/litehrnet/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -115,7 +115,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
@@ -172,7 +171,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -204,8 +203,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml
index 64bcf8e7..8342a7ef 100644
--- a/qai_hub_models/models/litehrnet/perf.yaml
+++ b/qai_hub_models/models/litehrnet/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: LiteHRNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 11261.0
-      throughput: 88.80206020779683
+      inference_time: 11191.0
+      throughput: 89.35751943526047
       estimated_peak_memory_range:
-        min: 6529024
-        max: 13390128
+        min: 6553600
+        max: 29020720
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,7 +50,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: jogkr30y5
+      job_id: jo5m4j295
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +59,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:42:54Z'
+    timestamp: '2024-06-22T22:46:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 7629.0
-      throughput: 131.07877834578582
+      inference_time: 7507.0
+      throughput: 133.20900492873318
       estimated_peak_memory_range:
-        min: 6545408
-        max: 86932832
+        min: 20480
+        max: 80905408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -71,7 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: jn5q9317p
+      job_id: jegnxjyq5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:42:55Z'
+    timestamp: '2024-06-22T22:46:58Z'
   - torchscript_onnx_tflite:
-      inference_time: 11181.0
-      throughput: 89.43743851176103
+      inference_time: 11155.0
+      throughput: 89.64589870013447
       estimated_peak_memory_range:
-        min: 6561792
-        max: 18010528
+        min: 6529024
+        max: 11698232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -94,7 +96,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 10
         total_layers: 1236
-      job_id: j1gle38ep
+      job_id: jopr9zq7p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,4 +105,27 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:42:56Z'
+    timestamp: '2024-06-22T22:46:59Z'
+  - torchscript_onnx_tflite:
+      inference_time: 11199.0
+      throughput: 89.2936869363336
+      estimated_peak_memory_range:
+        min: 6529024
+        max: 198676280
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1226
+        layers_on_gpu: 0
+        layers_on_cpu: 10
+        total_layers: 1236
+      job_id: jep2j26q5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:47:00Z'
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
index 54e3e3f8..d7443fc1 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md
@@ -3,7 +3,7 @@
 
 # [Llama-v2-7B-Chat: State-of-the-art large language model useful on a variety of language understanding and generation tasks](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized)
 
-Llama 2 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to 4-bit weights and 16-bit activations making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-KVCache-Quantized's latency.
+Llama 2 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to w4a16(4-bit weights and 16-bit activations) and part of the model is quantized to w8a16(8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-KVCache-Quantized's latency.
 
 This is based on the implementation of Llama-v2-7B-Chat found
 [here](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). This repository contains scripts for optimized on-device
@@ -34,7 +34,39 @@ Here, we divide the model into 4 parts in order to
 
 In order to export Llama 2, please ensure
 1. Host machine has >40GB memory (RAM+swap-space)
-2. If you don't have enough memory, export.py will dump instructions to increase swap space accordingly
+2. If you don't have enough memory, export.py will dump instructions to increase swap space accordingly.
+
+## Sample output prompts generated on-device
+1. --prompt "what is gravity?" --max-output-tokens 30
+~~~
+-------- Response Summary --------
+Prompt: what is gravity?
+Response: Hello! I'm here to help you answer your question. Gravity is a fundamental force of nature that affects the behavior of objects with mass
+~~~
+
+2. --prompt "what is 2+3?" --max-output-tokens 30
+~~~
+-------- Response Summary --------
+Prompt: what is 2+3?
+Response: Of course! I'm happy to help! The answer to 2+3 is 5.
+~~~
+
+3. --prompt "could you please write code for fibonacci series in python?" --max-output-tokens 100
+~~~
+-------- Response Summary --------
+Prompt: could you please write code for fibonacci series in python?
+Response: Of course! Here is an example of how you could implement the Fibonacci sequence in Python:
+```
+def fibonacci(n):
+    if n <= 1:
+        return n
+    else:
+        return fibonacci(n-1) + fibonacci(n-2)
+```
+You can test the function by calling it with different values of `n`, like this:
+```
+print(fibonacci(5))
+~~~
 
 
 
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py
index ca8e050f..652f98e4 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/__init__.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-from .app import ChatApp as App  # noqa: F401
+from qai_hub_models.models._shared.llama.app import ChatApp as App  # noqa: F401
+
 from .model import MODEL_ID  # noqa: F401
 from .model import Llama2_Quantized as Model  # noqa: F401
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py
index 33c39ca9..14cf4b71 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/demo.py
@@ -6,112 +6,84 @@
 
 from typing import List, Type
 
-import qai_hub as hub
-from transformers import LlamaTokenizer
-
+from qai_hub_models.models._shared.llama.demo import llama_chat_demo
 from qai_hub_models.models.llama_v2_7b_chat_quantized import MODEL_ID, Model
-from qai_hub_models.models.llama_v2_7b_chat_quantized.app import ChatApp as App
-from qai_hub_models.models.llama_v2_7b_chat_quantized.app import (
-    HubLlama2ModelPipeline,
-    Llama2ModelPipeline,
-)
 from qai_hub_models.models.llama_v2_7b_chat_quantized.model import (
-    DEFAULT_INPUT_SEQ_LEN,
     DEFAULT_USER_PROMPT,
+    END_TOKENS,
     HF_REPO_NAME,
     HF_REPO_URL,
-)
-from qai_hub_models.utils.args import (
-    get_model_cli_parser,
-    get_on_device_demo_parser,
-    validate_on_device_demo_args,
+    MODEL_SPLIT_MAP,
+    NUM_KEY_VAL_HEADS,
+    NUM_SPLITS,
+    Llama2_PromptProcessor_1_Quantized,
+    Llama2_PromptProcessor_2_Quantized,
+    Llama2_PromptProcessor_3_Quantized,
+    Llama2_PromptProcessor_4_Quantized,
+    Llama2_TokenGenerator_1_Quantized,
+    Llama2_TokenGenerator_2_Quantized,
+    Llama2_TokenGenerator_3_Quantized,
+    Llama2_TokenGenerator_4_Quantized,
+    get_input_prompt_with_tags,
+    get_tokenizer,
+    prepare_combined_attention_mask,
 )
 from qai_hub_models.utils.base_model import BaseModel, TargetRuntime
-from qai_hub_models.utils.huggingface import has_model_access
 
-# Max output tokens to generate
-# You can override this with cli argument.
-# Keeping this short as on-device demo takes time to converge.
-MAX_OUTPUT_TOKENS = 10
-DEFAULT_DEVICE = "Samsung Galaxy S24"
 
+def _get_model_class(split_part: int, is_token_generator: bool = False):
+    if split_part < 1 or split_part > 4:
+        raise RuntimeError(
+            "Incorrect index provided to request Model split class."
+            f" Must be within (1-4), provided ({split_part})."
+        )
+
+    if is_token_generator:
+        return [
+            Llama2_TokenGenerator_1_Quantized,
+            Llama2_TokenGenerator_2_Quantized,
+            Llama2_TokenGenerator_3_Quantized,
+            Llama2_TokenGenerator_4_Quantized,
+        ][split_part - 1]
+    return [
+        Llama2_PromptProcessor_1_Quantized,
+        Llama2_PromptProcessor_2_Quantized,
+        Llama2_PromptProcessor_3_Quantized,
+        Llama2_PromptProcessor_4_Quantized,
+    ][split_part - 1]
 
-def llama_chat_demo(
+
+def llama_2_chat_demo(
     model_cls: Type[BaseModel] = Model,
     model_id: str = MODEL_ID,
+    num_splits: int = NUM_SPLITS,
+    num_key_val_heads: int = NUM_KEY_VAL_HEADS,
+    model_split_map: dict = MODEL_SPLIT_MAP,
+    end_tokens: set = END_TOKENS,
+    hf_repo_name: str = HF_REPO_NAME,
+    hf_repo_url: str = HF_REPO_URL,
     default_prompt: str = DEFAULT_USER_PROMPT,
     is_test: bool = False,
     available_target_runtimes: List[TargetRuntime] = [TargetRuntime.QNN],
 ):
-    # Demo parameters
-    parser = get_model_cli_parser(model_cls)
-    parser = get_on_device_demo_parser(
-        parser,
-        add_output_dir=True,
+    llama_chat_demo(
+        model_cls=model_cls,
+        model_id=model_id,
+        get_model_class=_get_model_class,
+        get_input_prompt_with_tags=get_input_prompt_with_tags,
+        prepare_combined_attention_mask=prepare_combined_attention_mask,
+        tokenizer=get_tokenizer(),
+        num_splits=num_splits,
+        num_key_val_heads=num_key_val_heads,
+        model_split_map=model_split_map,
+        end_tokens=end_tokens,
+        hf_repo_name=hf_repo_name,
+        hf_repo_url=hf_repo_url,
+        default_prompt=default_prompt,
+        is_test=is_test,
         available_target_runtimes=available_target_runtimes,
-        default_device=DEFAULT_DEVICE,
-    )
-    parser.add_argument(
-        "--prompt",
-        type=str,
-        default=default_prompt,
-        help="input prompt.",
-    )
-    parser.add_argument(
-        "--prompt-processor-input-seq-len",
-        type=int,
-        default=DEFAULT_INPUT_SEQ_LEN,
-        help="input sequence length for prompt-processor. This must be less than `max_position_embeddings` set for model.",
-    )
-    parser.add_argument(
-        "--max-output-tokens",
-        type=int,
-        default=MAX_OUTPUT_TOKENS,
-        help="max output tokens to generate.",
-    )
-    args = parser.parse_args([] if is_test else None)
-    validate_on_device_demo_args(args, model_id)
-
-    if not args.on_device:
-        prompt_processor = Llama2ModelPipeline(Model.from_pretrained())
-        token_generator = Llama2ModelPipeline(
-            Model.from_pretrained(), is_token_generator=True
-        )
-    else:
-        hub_model_ids = args.hub_model_id.split(",")
-        # First four models are Prompt Processor
-        # Last four models are Token Generator
-        if len(hub_model_ids) != 8:
-            raise RuntimeError(
-                "Please provide comma separated hub-model-ids for Llama Prompt Processor and Token Generator,"
-                " e.g. --hub-model-id <id-1>,<id-2>,<id-3>,<id-4>,<id-5>,<id-6>,<id-7>,<id-8>.\n"
-                "Specify model-ids for four Prompt Processor models first, then Token Generator models.\n"
-                "If you run export.py it will print out command to run on-device demo with ordered model-ids."
-            )
-
-        hub_device = hub.Device(args.device)
-        prompt_processor = HubLlama2ModelPipeline(
-            hub_model_ids[:4],
-            hub_device=hub_device,
-            inference_options=args.inference_options,
-        )
-        token_generator = HubLlama2ModelPipeline(
-            hub_model_ids[4:],
-            hub_device=hub_device,
-            inference_options=args.inference_options,
-            is_token_generator=True,
-        )
-
-    has_model_access(HF_REPO_NAME, HF_REPO_URL)
-    tokenizer = LlamaTokenizer.from_pretrained(HF_REPO_NAME)
-
-    app = App(prompt_processor, token_generator, tokenizer=tokenizer)
-    app.generate_output_prompt(
-        args.prompt,
-        max_seq_len=args.prompt_processor_input_seq_len,
-        max_output_tokens=args.max_output_tokens,
     )
 
 
 if __name__ == "__main__":
-    llama_chat_demo()
+    llama_2_chat_demo()
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py
index 30fe892d..f0e951d0 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py
@@ -32,27 +32,27 @@
 from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
 
 ALL_COMPONENTS = [
-    "Llama2_PromptProcessor_1_Quantized",
-    "Llama2_PromptProcessor_2_Quantized",
-    "Llama2_PromptProcessor_3_Quantized",
-    "Llama2_PromptProcessor_4_Quantized",
-    "Llama2_TokenGenerator_1_Quantized",
-    "Llama2_TokenGenerator_2_Quantized",
-    "Llama2_TokenGenerator_3_Quantized",
-    "Llama2_TokenGenerator_4_Quantized",
+    "PromptProcessor_1_Quantized",
+    "PromptProcessor_2_Quantized",
+    "PromptProcessor_3_Quantized",
+    "PromptProcessor_4_Quantized",
+    "TokenGenerator_1_Quantized",
+    "TokenGenerator_2_Quantized",
+    "TokenGenerator_3_Quantized",
+    "TokenGenerator_4_Quantized",
 ]
 DEFAULT_COMPONENTS = [
-    "Llama2_PromptProcessor_1_Quantized",
-    "Llama2_PromptProcessor_2_Quantized",
-    "Llama2_PromptProcessor_3_Quantized",
-    "Llama2_PromptProcessor_4_Quantized",
-    "Llama2_TokenGenerator_1_Quantized",
-    "Llama2_TokenGenerator_2_Quantized",
-    "Llama2_TokenGenerator_3_Quantized",
-    "Llama2_TokenGenerator_4_Quantized",
+    "PromptProcessor_1_Quantized",
+    "PromptProcessor_2_Quantized",
+    "PromptProcessor_3_Quantized",
+    "PromptProcessor_4_Quantized",
+    "TokenGenerator_1_Quantized",
+    "TokenGenerator_2_Quantized",
+    "TokenGenerator_3_Quantized",
+    "TokenGenerator_4_Quantized",
 ]
 
-DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24"
+DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24 (Family)"
 
 
 def export_model(
@@ -137,8 +137,9 @@ def export_model(
     compile_jobs: Dict[str, hub.client.CompileJob] = {}
     profile_options_per_component: Dict[str, str] = {}
 
-    for component_name in components:
+    for i, component_name in enumerate(components):
         # Load model part
+
         component = model.load_model_part(component_name)
 
         input_spec = component.get_input_spec(
@@ -281,7 +282,7 @@ def main():
         model_cls=Model,
         components=ALL_COMPONENTS,
         supports_tflite=False,
-        supports_ort=False,
+        supports_precompiled_qnn_onnx=False,
         default_export_device=DEFAULT_EXPORT_DEVICE,
     )
     args = parser.parse_args()
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml
index 1069bac6..f142ff26 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/info.yaml
@@ -6,7 +6,8 @@ headline: State-of-the-art large language model useful on a variety of language
 domain: Generative AI
 description: Llama 2 is a family of LLMs. The "Chat" at the end indicates that
   the model is optimized for chatbot-like dialogue. The model is quantized to
-  4-bit weights and 16-bit activations making it suitable for on-device
+  w4a16(4-bit weights and 16-bit activations) and part of the model is quantized to
+  w8a16(8-bit weights and 16-bit activations) making it suitable for on-device
   deployment. For Prompt and output length specified below, the time to first token is
   Llama-PromptProcessor-Quantized's latency and average time per addition token is
   Llama-TokenGenerator-KVCache-Quantized's latency.
@@ -21,17 +22,18 @@ license: https://github.com/facebookresearch/llama/blob/main/LICENSE
 source_repo: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
 technical_details:
   Number of parameters: 7B
-  Model size: 3.6GB
+  Precision: w4a16 + w8a16 (few layers)
   Model-1 (Prompt Processor): Llama-PromptProcessor-Quantized
   Max context length: 1024
+  Prompt processor model size: 3.6 GB
   Prompt processor input: 1024 tokens
   Prompt processor output: 1024 output tokens + KVCache for token generator
   Model-2 (Token Generator): Llama-TokenGenerator-KVCache-Quantized
+  Token generator model size: 3.6 GB
   Token generator input: 1 input token + past KVCache
   Token generator output: 1 output token + KVCache for next iteration
   Decoding length: 1024 (1 output token + 1023 from KVCache)
   Use: Initiate conversation with prompt-processor and then token generator for subsequent iterations.
-  QNN-SDK: "2.19"
 applicable_scenarios:
   - Dialogue
   - Content Generation
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py
index 5dc3fda2..7e97997f 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py
@@ -5,25 +5,28 @@
 from __future__ import annotations
 
 import os
-import pickle
 from typing import Optional, Tuple
 
 import torch
-from qai_hub.client import DatasetEntries, Device
+from qai_hub.client import DatasetEntries
 
-from qai_hub_models.models.common import (
-    SampleInputsType,
-    SourceModelFormat,
-    TargetRuntime,
+from qai_hub_models.models._shared.llama.model import (
+    DEFAULT_INPUT_SEQ_LEN,
+    Llama_QuantizedMixin,
+    RopeEmbedding,
+    get_hidden_layer_range_from_split,
+    get_past_key_names,
+    get_past_keyval_with_shift,
+    load_input_cached_data,
+    make_torch_compatible_past_key_values,
+    save_input_cached_data,
 )
 from qai_hub_models.models.llama_v2_7b_chat_quantized.modeling_llama import (
     LlamaForCausalLM,
     LlamaModel,
-    RopeEmbedding,
 )
-from qai_hub_models.utils.aimet.aimet_dummy_model import AimetEncodingLoaderMixin
-from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, CachedWebModelAsset
-from qai_hub_models.utils.base_model import BaseModel, CollectionModel, TargetRuntime
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+from qai_hub_models.utils.base_model import CollectionModel, TargetRuntime
 from qai_hub_models.utils.huggingface import (
     ensure_has_required_transformer,
     has_model_access,
@@ -44,20 +47,32 @@
 
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 4
+MODEL_ASSET_VERSION = 6
 
 # Configs
 AIMET_ENCODINGS_PREFIX = "config"
-AIMET_CONFIG = "default_config_llama"
 
 # Model parameters
 MAX_HIDDEN_LAYERS = 32
 MAX_POS_EMBEDDINGS = 1024
 DEFAULT_INPUT_SEQ_LEN = 1024
+ATTENTION_HIDDEN_DIM = 4096
+POS_EMBED_DIM = 64
 DATA_DIR = "data"
 USE_CACHED_DATA = True
 NUM_SPLITS = 4
 LAYERS_PER_SPLIT = 8
+NUM_KEY_VAL_HEADS = 32
+
+# Model split map to track DecodeLayer split for each part
+# key (model split number) ->
+# value Tuple of (start index of decoder Layer, end index of decode layer)
+MODEL_SPLIT_MAP = {
+    1: (0, 8),
+    2: (8, 16),
+    3: (16, 24),
+    4: (24, 32),
+}
 
 # Hugging face repo name and url
 HF_REPO_NAME = "meta-llama/Llama-2-7b-chat-hf"
@@ -73,11 +88,10 @@
 SYS_END = "<</SYS>>"
 INST_START = "[INST]"
 INST_END = "[/INST]"
-DEFAULT_PROMPT_CONTEXT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+END_TOKENS = {"</s>"}
 
-If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
-"""
-DEFAULT_USER_PROMPT = "Hi! How are you?"
+DEFAULT_PROMPT_CONTEXT = "You are a helpful AI assistant"
+DEFAULT_USER_PROMPT = "Hi! What is 2+3?"
 
 
 def get_input_prompt_with_tags(
@@ -131,136 +145,6 @@ def prepare_combined_attention_mask(
     return new_mask
 
 
-def _input_cached_data_save(
-    data: dict,
-    split_part: int,
-    model_type: str = "pp",
-    input_seq_len: int = DEFAULT_INPUT_SEQ_LEN,
-):
-    data_path = (
-        f"{DATA_DIR}/{input_seq_len}/llama_v2_{split_part}_{model_type}_inputs.pkl"
-    )
-
-    inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path(
-        MODEL_ID,
-        MODEL_ASSET_VERSION,
-        f"{data_path}",
-    )
-
-    # if already exists, no need to re-serialize.
-    if os.path.exists(inputs_pkl_path):
-        return
-
-    os.makedirs(os.path.dirname(inputs_pkl_path), exist_ok=True)
-    with open(f"{inputs_pkl_path}", "wb") as f:
-        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
-
-
-def _input_cached_data_load(
-    split_part: int, model_type: str = "pp", input_seq_len: int = DEFAULT_INPUT_SEQ_LEN
-):
-    data_path = (
-        f"{DATA_DIR}/{input_seq_len}/llama_v2_{split_part}_{model_type}_inputs.pkl"
-    )
-    try:
-
-        # Load local data path if already generated
-        inputs_pkl_path = ASSET_CONFIG.get_local_store_model_path(
-            MODEL_ID,
-            MODEL_ASSET_VERSION,
-            f"{data_path}",
-        )
-
-        # If local data path not found, fetch from server if available
-        if not os.path.exists(inputs_pkl_path):
-            inputs_pkl_path = CachedWebModelAsset.from_asset_store(
-                MODEL_ID,
-                MODEL_ASSET_VERSION,
-                data_path,
-            ).fetch()
-
-        with open(f"{inputs_pkl_path}", "rb") as f:
-            return pickle.load(f)
-    except Exception:
-        # Delete intermediate data file if error occurs
-        if os.path.exists(inputs_pkl_path):
-            os.remove(inputs_pkl_path)
-        print(
-            f"Unable to load cached data for {data_path}, creating data using PyTorch models."
-        )
-        # Unable to load cached data, return None
-        return None
-
-
-def _get_model_data(
-    split_part: int,
-    input_seq_len: int = DEFAULT_INPUT_SEQ_LEN,
-    is_token_generator=False,
-):
-    """
-    Helper method to get model data from given split number
-    """
-    if is_token_generator:
-        if split_part == 1:
-            return Llama2_TokenGenerator_1_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-        if split_part == 2:
-            return Llama2_TokenGenerator_2_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-        if split_part == 3:
-            return Llama2_TokenGenerator_3_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-        if split_part == 4:
-            return Llama2_TokenGenerator_4_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-    else:
-        if split_part == 1:
-            return Llama2_PromptProcessor_1_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-        elif split_part == 2:
-            return Llama2_PromptProcessor_2_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-        elif split_part == 3:
-            return Llama2_PromptProcessor_3_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-        elif split_part == 4:
-            return Llama2_PromptProcessor_4_Quantized.get_model_data(
-                input_seq_len=input_seq_len
-            )
-    raise RuntimeError(f"Unsupported split_part {split_part} provided.")
-
-
-def _get_hidden_layer_range_from_split(split_part: int):
-    num_of_hidden_layers_per_part = LAYERS_PER_SPLIT
-    hidden_layers_start = num_of_hidden_layers_per_part * (split_part - 1)
-    hidden_layers_end = hidden_layers_start + num_of_hidden_layers_per_part
-    return hidden_layers_start, hidden_layers_end
-
-
-def _get_past_key_names(start: int = 0, end: int = 8, suffix=""):
-    past_key_val_name = []
-    for i in range(start, end):
-        cache_names = [f"past_key_{i}_h{j}{suffix}" for j in range(32)] + [
-            f"past_value_{i}_h{j}{suffix}" for j in range(32)
-        ]
-        past_key_val_name.extend(cache_names)
-    return past_key_val_name
-
-
-def _get_output_names_from_split(split_part: int = 1):
-    layer_start, layer_end = _get_hidden_layer_range_from_split(split_part=split_part)
-    output_list = [f"layers_{layer_end - 1}_add_out_0"]
-    output_list += _get_past_key_names(layer_start, layer_end, suffix="_out")
-    return output_list
-
-
 class Llama2Wrapper(torch.nn.Module):
     def __init__(
         self,
@@ -294,8 +178,8 @@ def __init__(
                 f"Llama2 split_part must be within 1-4 (Provided {split_part})."
             )
 
-        hidden_layers_start, hidden_layers_end = _get_hidden_layer_range_from_split(
-            split_part
+        hidden_layers_start, hidden_layers_end = get_hidden_layer_range_from_split(
+            split_part, MODEL_SPLIT_MAP
         )
         config.hidden_layers_start = hidden_layers_start
         config.hidden_layers_end = hidden_layers_end
@@ -375,7 +259,7 @@ def forward_token_generator(
         position_ids_sin,
         *past_key_values,
     ):
-        past_key_values_tuple = _make_torch_compatible_past_key_values(
+        past_key_values_tuple = make_torch_compatible_past_key_values(
             self.total_hidden_layers, 32, *past_key_values
         )
         return self.model(
@@ -437,85 +321,42 @@ def from_pretrained(
         return Llama2_Quantized(max_position_embeddings=max_position_embeddings)
 
     def load_model_part(self, split_part):
-        if split_part == "Llama2_PromptProcessor_1_Quantized":
+        if split_part == "PromptProcessor_1_Quantized":
             return Llama2_PromptProcessor_1_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
-        if split_part == "Llama2_PromptProcessor_2_Quantized":
+        if split_part == "PromptProcessor_2_Quantized":
             return Llama2_PromptProcessor_2_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
-        if split_part == "Llama2_PromptProcessor_3_Quantized":
+        if split_part == "PromptProcessor_3_Quantized":
             return Llama2_PromptProcessor_3_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
-        if split_part == "Llama2_PromptProcessor_4_Quantized":
+        if split_part == "PromptProcessor_4_Quantized":
             return Llama2_PromptProcessor_4_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
-        if split_part == "Llama2_TokenGenerator_1_Quantized":
+        if split_part == "TokenGenerator_1_Quantized":
             return Llama2_TokenGenerator_1_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings,
             )
-        if split_part == "Llama2_TokenGenerator_2_Quantized":
+        if split_part == "TokenGenerator_2_Quantized":
             return Llama2_TokenGenerator_2_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
-        if split_part == "Llama2_TokenGenerator_3_Quantized":
+        if split_part == "TokenGenerator_3_Quantized":
             return Llama2_TokenGenerator_3_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
-        if split_part == "Llama2_TokenGenerator_4_Quantized":
+        if split_part == "TokenGenerator_4_Quantized":
             return Llama2_TokenGenerator_4_Quantized.from_pretrained(
                 max_position_embeddings=self.max_position_embeddings
             )
         raise RuntimeError(f"Unsupported split_part {split_part}.")
 
 
-class Llama2_QuantizedMixin(AimetEncodingLoaderMixin, BaseModel):
-    def __init__(self, model, encoding_path, is_token_generator=False):
-        AimetEncodingLoaderMixin.__init__(self, model, encoding_path)
-        BaseModel.__init__(self)
-        self.model = model
-        self.split_part = 1
-        self.is_token_generator = is_token_generator
-
-    def get_hub_compile_options(
-        self,
-        target_runtime: TargetRuntime,
-        other_compile_options: str = "",
-        device: Optional[Device] = None,
-    ) -> str:
-        if target_runtime != TargetRuntime.QNN:
-            raise RuntimeError(
-                f"Unsupported target_runtime provided: {target_runtime}."
-                " Only QNN runtime is supported for Llama for now."
-            )
-
-        return " --target_runtime qnn_context_binary --quantize_full_type w8a16 --quantize_io"
-
-    @staticmethod
-    def get_output_names():
-        # Clipped hidden layers are named same as first part for all parts
-        # Eventually, each split should have respective names.
-        return _get_output_names_from_split(split_part=1)
-
-    def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType:
-        data = self.get_calibration_data(input_spec=input_spec)
-        for key, val in data.items():
-            data[key] = [val.detach().numpy()]
-        return data
-
-    def preferred_hub_source_model_format(
-        self, target_runtime: TargetRuntime
-    ) -> SourceModelFormat:
-        """
-        Source model format preferred for conversion on AI Hub.
-        """
-        return SourceModelFormat.ONNX
-
-
-class Llama2_PromptProcessor_1_Quantized(Llama2_QuantizedMixin):
+class Llama2_PromptProcessor_1_Quantized(Llama_QuantizedMixin):
     def __init__(self, model, encoding_path):
         super().__init__(model, encoding_path)
         self.model = model
@@ -550,13 +391,20 @@ def get_input_spec(
         return {
             "input_ids": ((1, input_seq_length), "int32"),
             "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"),
-            "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"),
+            "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
         }
 
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=1)
+        data = load_input_cached_data(
+            split_part=1,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -585,7 +433,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
         ).get_embedding(position_ids)
         inputs["position_ids_cos"] = position_ids_cos
         inputs["position_ids_sin"] = position_ids_sin
-        _input_cached_data_save(inputs, split_part=1, input_seq_len=input_seq_len)
+        save_input_cached_data(
+            inputs,
+            split_part=1,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         return inputs
 
     def get_calibration_data(
@@ -605,7 +461,7 @@ def get_calibration_data(
         )
 
 
-class Llama2_PromptProcessor_2_Quantized(Llama2_QuantizedMixin):
+class Llama2_PromptProcessor_2_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path)
         self.split_part = 2
@@ -637,15 +493,22 @@ def get_input_spec(
         # This can be used with the qai_hub python API to declare
         # the model input specification upon submitting a compile job.
         return {
-            "input_ids": ((1, input_seq_length, 4096), "float32"),
+            "input_ids": ((1, input_seq_length, ATTENTION_HIDDEN_DIM), "float32"),
             "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"),
-            "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"),
+            "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
         }
 
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=2)
+        data = load_input_cached_data(
+            split_part=2,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -661,7 +524,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
         new_inputs["attention_mask"] = inputs["attention_mask"]
         new_inputs["position_ids_cos"] = inputs["position_ids_cos"]
         new_inputs["position_ids_sin"] = inputs["position_ids_sin"]
-        _input_cached_data_save(new_inputs, split_part=2, input_seq_len=input_seq_len)
+        save_input_cached_data(
+            new_inputs,
+            split_part=2,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         return new_inputs
 
     def get_calibration_data(
@@ -681,7 +552,7 @@ def get_calibration_data(
         )
 
 
-class Llama2_PromptProcessor_3_Quantized(Llama2_QuantizedMixin):
+class Llama2_PromptProcessor_3_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path)
         self.split_part = 3
@@ -713,15 +584,22 @@ def get_input_spec(
         # This can be used with the qai_hub python API to declare
         # the model input specification upon submitting a compile job.
         return {
-            "input_ids": ((1, input_seq_length, 4096), "float32"),
+            "input_ids": ((1, input_seq_length, ATTENTION_HIDDEN_DIM), "float32"),
             "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"),
-            "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"),
+            "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
         }
 
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=3)
+        data = load_input_cached_data(
+            split_part=3,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -737,7 +615,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
         new_inputs["attention_mask"] = inputs["attention_mask"]
         new_inputs["position_ids_cos"] = inputs["position_ids_cos"]
         new_inputs["position_ids_sin"] = inputs["position_ids_sin"]
-        _input_cached_data_save(new_inputs, split_part=3, input_seq_len=input_seq_len)
+        save_input_cached_data(
+            new_inputs,
+            split_part=3,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         return new_inputs
 
     def get_calibration_data(
@@ -757,7 +643,7 @@ def get_calibration_data(
         )
 
 
-class Llama2_PromptProcessor_4_Quantized(Llama2_QuantizedMixin):
+class Llama2_PromptProcessor_4_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path)
         self.split_part = 4
@@ -789,15 +675,34 @@ def get_input_spec(
         # This can be used with the qai_hub python API to declare
         # the model input specification upon submitting a compile job.
         return {
-            "input_ids": ((1, input_seq_length, 4096), "float32"),
+            "input_ids": ((1, input_seq_length, ATTENTION_HIDDEN_DIM), "float32"),
             "attention_mask": ((1, 1, input_seq_length, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, input_seq_length, 64), "float32"),
-            "position_ids_sin": ((1, 1, input_seq_length, 64), "float32"),
+            "position_ids_cos": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, input_seq_length, POS_EMBED_DIM), "float32"),
         }
 
+    @staticmethod
+    def get_output_names():
+        layers_start, layers_end = get_hidden_layer_range_from_split(
+            split_part=4, model_split_map=MODEL_SPLIT_MAP
+        )
+        return Llama_QuantizedMixin.get_output_names(
+            start=layers_start,
+            end=layers_end,
+            past_key_val_heads=NUM_KEY_VAL_HEADS,
+            output_name="logits",
+        )
+
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=4)
+        data = load_input_cached_data(
+            split_part=4,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -812,7 +717,15 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
         new_inputs["attention_mask"] = inputs["attention_mask"]
         new_inputs["position_ids_cos"] = inputs["position_ids_cos"]
         new_inputs["position_ids_sin"] = inputs["position_ids_sin"]
-        _input_cached_data_save(new_inputs, split_part=4, input_seq_len=input_seq_len)
+        save_input_cached_data(
+            new_inputs,
+            split_part=4,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            input_seq_len=input_seq_len,
+        )
         return new_inputs
 
     def get_calibration_data(
@@ -837,49 +750,7 @@ def get_calibration_data(
 #
 
 
-def get_past_keyval_with_shift(past_key_vals):
-    """
-    Clip past key value to feed next iteration
-    """
-    tg_inputs = {}
-    for i in range(0, len(past_key_vals), 64):
-        l_num = i // 64
-        for j, key in enumerate(past_key_vals[i : i + 32]):
-            tg_inputs[f"past_key_{l_num}_h{j}"] = key[:, :, :, 1:].detach()
-
-        for j, val in enumerate(past_key_vals[i + 32 : i + 64]):
-            tg_inputs[f"past_value_{l_num}_h{j}"] = val[:, :, 1:, :].detach()
-
-    return tg_inputs
-
-
-def _make_torch_compatible_past_key_values(
-    decode_layers, split_per_layer, *past_values_flattened
-):
-    past_key_values = []
-    total_past_entries = len(past_values_flattened)
-
-    # past values consists of
-    # 1. k decode/hidden layers
-    # 2. each decode layer has 2 entries: key and value
-    # 3. each key-value entry is has 32 layer
-    if total_past_entries != decode_layers * split_per_layer * 2:
-        raise RuntimeError(
-            "Incorrect number of past key-values provided for model."
-            f"Expecting {decode_layers * split_per_layer * 2}, got {total_past_entries}."
-        )
-
-    for i in range(0, decode_layers * 2, 2):
-        keys = past_values_flattened[i * split_per_layer : (i + 1) * split_per_layer]
-        values = past_values_flattened[
-            (i + 1) * split_per_layer : (i + 2) * split_per_layer
-        ]
-
-        past_key_values.append((keys, values))
-    return tuple(past_key_values)
-
-
-class Llama2_TokenGenerator_1_Quantized(Llama2_QuantizedMixin):
+class Llama2_TokenGenerator_1_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path, is_token_generator=True)
         self.split_part = 1
@@ -923,12 +794,12 @@ def get_input_spec(
         input_spec = {
             "input_ids": ((1, 1), "int32"),
             "attention_mask": ((1, 1, 1, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, 1, 64), "float32"),
-            "position_ids_sin": ((1, 1, 1, 64), "float32"),
+            "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"),
         }
 
         # Collect past_key_values and drop output names
-        past_key_val_names = _get_past_key_names()
+        past_key_val_names = get_past_key_names()
         for past_key_val in past_key_val_names:
             if "key" in past_key_val:
                 input_spec[past_key_val] = (
@@ -944,7 +815,15 @@ def get_input_spec(
 
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=1, model_type="tg")
+        data = load_input_cached_data(
+            split_part=1,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            model_type="tg",
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -1000,14 +879,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
             "position_ids_sin": position_ids_sin,
         }
 
-        key_val = get_past_keyval_with_shift(output[1:])
+        key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS)
         for key, val in key_val.items():
             data[key] = val
 
-        _input_cached_data_save(
+        save_input_cached_data(
             data,
             split_part=1,
             model_type="tg",
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
             input_seq_len=input_seq_len,
         )
         return data
@@ -1030,7 +913,7 @@ def get_calibration_data(
         )
 
 
-class Llama2_TokenGenerator_2_Quantized(Llama2_QuantizedMixin):
+class Llama2_TokenGenerator_2_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path, is_token_generator=True)
         self.split_part = 2
@@ -1072,14 +955,14 @@ def get_input_spec(
         # the model input specification upon submitting a compile job.
 
         input_spec = {
-            "input_ids": ((1, 1, 4096), "float32"),
+            "input_ids": ((1, 1, ATTENTION_HIDDEN_DIM), "float32"),
             "attention_mask": ((1, 1, 1, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, 1, 64), "float32"),
-            "position_ids_sin": ((1, 1, 1, 64), "float32"),
+            "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"),
         }
 
         # Collect past_key_values and drop output names
-        past_key_val_names = _get_past_key_names()
+        past_key_val_names = get_past_key_names()
         for past_key_val in past_key_val_names:
             if "key" in past_key_val:
                 input_spec[past_key_val] = (
@@ -1095,7 +978,15 @@ def get_input_spec(
 
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=2, model_type="tg")
+        data = load_input_cached_data(
+            split_part=2,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            model_type="tg",
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -1120,14 +1011,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
             "position_ids_sin": inputs["position_ids_sin"],
         }
 
-        key_val = get_past_keyval_with_shift(output[1:])
+        key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS)
         for key, val in key_val.items():
             data[key] = val
 
-        _input_cached_data_save(
+        save_input_cached_data(
             data,
             split_part=2,
             model_type="tg",
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
             input_seq_len=input_seq_len,
         )
         return data
@@ -1149,7 +1044,7 @@ def get_calibration_data(
         )
 
 
-class Llama2_TokenGenerator_3_Quantized(Llama2_QuantizedMixin):
+class Llama2_TokenGenerator_3_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path, is_token_generator=True)
         self.split_part = 3
@@ -1191,14 +1086,14 @@ def get_input_spec(
         # the model input specification upon submitting a compile job.
 
         input_spec = {
-            "input_ids": ((1, 1, 4096), "float32"),
+            "input_ids": ((1, 1, ATTENTION_HIDDEN_DIM), "float32"),
             "attention_mask": ((1, 1, 1, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, 1, 64), "float32"),
-            "position_ids_sin": ((1, 1, 1, 64), "float32"),
+            "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"),
         }
 
         # Collect past_key_values and drop output names
-        past_key_val_names = _get_past_key_names()
+        past_key_val_names = get_past_key_names()
         for past_key_val in past_key_val_names:
             if "key" in past_key_val:
                 input_spec[past_key_val] = (
@@ -1214,7 +1109,15 @@ def get_input_spec(
 
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=3, model_type="tg")
+        data = load_input_cached_data(
+            split_part=3,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            model_type="tg",
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -1239,14 +1142,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
             "position_ids_sin": inputs["position_ids_sin"],
         }
 
-        key_val = get_past_keyval_with_shift(output[1:])
+        key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS)
         for key, val in key_val.items():
             data[key] = val
 
-        _input_cached_data_save(
+        save_input_cached_data(
             data,
             split_part=3,
             model_type="tg",
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
             input_seq_len=input_seq_len,
         )
         return data
@@ -1268,7 +1175,7 @@ def get_calibration_data(
         )
 
 
-class Llama2_TokenGenerator_4_Quantized(Llama2_QuantizedMixin):
+class Llama2_TokenGenerator_4_Quantized(Llama_QuantizedMixin):
     def __init__(self, model: torch.nn.Module, encoding_path: str):
         super().__init__(model, encoding_path, is_token_generator=True)
         self.split_part = 4
@@ -1310,14 +1217,14 @@ def get_input_spec(
         # the model input specification upon submitting a compile job.
 
         input_spec = {
-            "input_ids": ((1, 1, 4096), "float32"),
+            "input_ids": ((1, 1, ATTENTION_HIDDEN_DIM), "float32"),
             "attention_mask": ((1, 1, 1, input_seq_length), "float32"),
-            "position_ids_cos": ((1, 1, 1, 64), "float32"),
-            "position_ids_sin": ((1, 1, 1, 64), "float32"),
+            "position_ids_cos": ((1, 1, 1, POS_EMBED_DIM), "float32"),
+            "position_ids_sin": ((1, 1, 1, POS_EMBED_DIM), "float32"),
         }
 
         # Collect past_key_values and drop output names
-        past_key_val_names = _get_past_key_names()
+        past_key_val_names = get_past_key_names()
         for past_key_val in past_key_val_names:
             if "key" in past_key_val:
                 input_spec[past_key_val] = (
@@ -1331,9 +1238,29 @@ def get_input_spec(
                 )
         return input_spec
 
+    @staticmethod
+    def get_output_names():
+        layers_start, layers_end = get_hidden_layer_range_from_split(
+            split_part=4, model_split_map=MODEL_SPLIT_MAP
+        )
+        return Llama_QuantizedMixin.get_output_names(
+            start=layers_start,
+            end=layers_end,
+            past_key_val_heads=NUM_KEY_VAL_HEADS,
+            output_name="logits",
+        )
+
     @staticmethod
     def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
-        data = _input_cached_data_load(split_part=4, model_type="tg")
+        data = load_input_cached_data(
+            split_part=4,
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
+            model_type="tg",
+            input_seq_len=input_seq_len,
+        )
         if data is not None:
             return data
 
@@ -1358,14 +1285,18 @@ def get_model_data(input_seq_len: int = DEFAULT_INPUT_SEQ_LEN):
             "position_ids_sin": inputs["position_ids_sin"],
         }
 
-        key_val = get_past_keyval_with_shift(output[1:])
+        key_val = get_past_keyval_with_shift(output[1:], NUM_KEY_VAL_HEADS)
         for key, val in key_val.items():
             data[key] = val
 
-        _input_cached_data_save(
+        save_input_cached_data(
             data,
             split_part=4,
             model_type="tg",
+            data_dir=DATA_DIR,
+            model_name="llama_v2",
+            model_id=MODEL_ID,
+            model_asset_version=MODEL_ASSET_VERSION,
             input_seq_len=input_seq_len,
         )
         return data
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py
index b5ae0f78..3c8c0ec3 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/modeling_llama.py
@@ -23,18 +23,19 @@
 # limitations under the License.
 
 """ PyTorch LLaMA model."""
+from __future__ import annotations
+
 import math
 from typing import List, Optional, Tuple, Union
 
 import torch
 import torch.utils.checkpoint
 from torch import nn
-from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from torch.nn import CrossEntropyLoss
 from transformers.activations import ACT2FN
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
     CausalLMOutputWithPast,
-    SequenceClassifierOutputWithPast,
 )
 from transformers.modeling_utils import PreTrainedModel
 from transformers.models.llama.configuration_llama import LlamaConfig
@@ -190,50 +191,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
 ### ------- QCOM EDITS STARTS ------- ###
 
 
-class RopeEmbedding:
-    """
-    Compute Rotary Position Embedding
-    Ref: https://arxiv.org/pdf/2104.09864
-
-    Compute RopeEmbedding outside model to simplify model quantization
-    """
-
-    def __init__(self, head_dim: int = 128, max_length: int = 1024):
-        """
-        head_dim: dimension size of head
-        max_length: max sequence length to expect
-        """
-        self.max_length = max_length
-        self.cos, self.sin = self.precompute_freqs_cis(head_dim, max_length * 2)
-
-    def precompute_freqs_cis(self, dim: int, end: int, theta: float = 10000.0):
-        """
-        Precompute embeeding matrix
-        """
-        freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
-        t = torch.arange(end)
-        freqs = torch.outer(t, freqs).float()
-        freqs_cis = torch.polar(torch.ones_like(freqs), freqs)
-        freqs_cis = freqs_cis[0 : self.max_length]
-        freqs_real = torch.view_as_real(freqs_cis)
-        freqs_real = freqs_real.unsqueeze(0).unsqueeze(0)
-
-        freqs_cos = freqs_real[:, :, :, :, 0]  # extract even elements
-        freqs_sin = freqs_real[:, :, :, :, 1]  # extract odd elements
-        return freqs_cos, freqs_sin
-
-    def get_embedding(self, position_ids: torch.Tensor):
-        """
-        position_ids: [batch_size, sequence_length]
-        return [batch_size, 1, sequence_length, head_sim//2][2]
-        """
-        cos = self.cos[0, 0, :, :]  # [seq_len, dim]
-        sin = self.sin[0, 0, :, :]  # [seq_len, dim]
-        cos = cos[position_ids].unsqueeze(1)
-        sin = sin[position_ids].unsqueeze(1)
-        return cos, sin
-
-
 def apply_rotary_pos_emb_single(x, cos, sin, position_ids):
     # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
     cos = cos[0, 0, :, :]  # [seq_len, dim]
@@ -911,6 +868,9 @@ def forward(
             )
 
         ### ------- QCOM EDITS STARTS ------- ###
+        # Combined attention mask expand attention mask to rank-4
+        # [ bsz, 1, tgt_seq_len, src_seq_len ]
+        # check attention mask shape and fetch sequence length correctly.
         elif attention_mask is not None:
             attention_shape = attention_mask.shape
             batch_size = attention_shape[0]
@@ -1310,137 +1270,3 @@ def _reorder_cache(past_key_values, beam_idx):
                 ),
             )
         return reordered_past
-
-
-@add_start_docstrings(
-    """
-    The LLaMa Model transformer with a sequence classification head on top (linear layer).
-
-    [`LlamaForSequenceClassification`] uses the last token in order to do the classification, as other causal models
-    (e.g. GPT-2) do.
-
-    Since it does classification on the last token, it requires to know the position of the last token. If a
-    `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
-    no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
-    padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
-    each row of the batch).
-    """,
-    LLAMA_START_DOCSTRING,
-)
-class LlamaForSequenceClassification(LlamaPreTrainedModel):
-    _keys_to_ignore_on_load_missing = [r"lm_head.weight"]
-
-    def __init__(self, config):
-        super().__init__(config)
-        self.num_labels = config.num_labels
-        self.model = LlamaModel(config)
-        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def get_input_embeddings(self):
-        return self.model.embed_tokens
-
-    def set_input_embeddings(self, value):
-        self.model.embed_tokens = value
-
-    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        position_ids: Optional[torch.LongTensor] = None,
-        past_key_values: Optional[List[torch.FloatTensor]] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        labels: Optional[torch.LongTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
-        r"""
-        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
-            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
-            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
-            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
-        """
-        return_dict = (
-            return_dict if return_dict is not None else self.config.use_return_dict
-        )
-
-        transformer_outputs = self.model(
-            input_ids,
-            attention_mask=attention_mask,
-            position_ids=position_ids,
-            past_key_values=past_key_values,
-            inputs_embeds=inputs_embeds,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-        hidden_states = transformer_outputs[0]
-        logits = self.score(hidden_states)
-
-        if input_ids is not None:
-            batch_size = input_ids.shape[0]
-        else:
-            batch_size = inputs_embeds.shape[0]
-
-        if self.config.pad_token_id is None and batch_size != 1:
-            raise ValueError(
-                "Cannot handle batch sizes > 1 if no padding token is defined."
-            )
-        if self.config.pad_token_id is None:
-            sequence_lengths = -1
-        else:
-            if input_ids is not None:
-                sequence_lengths = (
-                    torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1
-                ).to(logits.device)
-            else:
-                sequence_lengths = -1
-
-        pooled_logits = logits[
-            torch.arange(batch_size, device=logits.device), sequence_lengths
-        ]
-
-        loss = None
-        if labels is not None:
-            labels = labels.to(logits.device)
-            if self.config.problem_type is None:
-                if self.num_labels == 1:
-                    self.config.problem_type = "regression"
-                elif self.num_labels > 1 and (
-                    labels.dtype == torch.long or labels.dtype == torch.int
-                ):
-                    self.config.problem_type = "single_label_classification"
-                else:
-                    self.config.problem_type = "multi_label_classification"
-
-            if self.config.problem_type == "regression":
-                loss_fct = MSELoss()
-                if self.num_labels == 1:
-                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
-                else:
-                    loss = loss_fct(pooled_logits, labels)
-            elif self.config.problem_type == "single_label_classification":
-                loss_fct = CrossEntropyLoss()
-                loss = loss_fct(
-                    pooled_logits.view(-1, self.num_labels), labels.view(-1)
-                )
-            elif self.config.problem_type == "multi_label_classification":
-                loss_fct = BCEWithLogitsLoss()
-                loss = loss_fct(pooled_logits, labels)
-        if not return_dict:
-            output = (pooled_logits,) + transformer_outputs[1:]
-            return ((loss,) + output) if loss is not None else output
-
-        return SequenceClassifierOutputWithPast(
-            loss=loss,
-            logits=pooled_logits,
-            past_key_values=transformer_outputs.past_key_values,
-            hidden_states=transformer_outputs.hidden_states,
-            attentions=transformer_outputs.attentions,
-        )
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml
index b8c5ad10..748615cc 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml
@@ -1,5 +1,5 @@
 models:
-- name: Llama-TokenGenerator-KVCache-Quantized
+- name: Llama2-TokenGenerator-KVCache-Quantized
   performance_metrics:
   - reference_device_info:
       name: Samsung Galaxy S24
@@ -10,11 +10,11 @@ models:
       chipset: Snapdragon® 8 Gen 3
     timestamp: '2024-05-23T00:34:02.549319Z'
     torchscript_onnx_qnn:
-      inference_time: 104953
-      throughput: 9.528
+      inference_time: 90268
+      throughput: 11.07
       estimated_peak_memory_range:
-        min: 331575296
-        max: 5017129568
+        min: 66715648
+        max: 4562679888
       layer_info:
         layers_on_npu: 34842
         layers_on_gpu: 0
@@ -47,7 +47,7 @@ models:
       primary_compute_unit: NPU
       job_id: "null"
       job_status: Passed
-- name: Llama-PromptProcessor-Quantized
+- name: Llama2-PromptProcessor-Quantized
   performance_metrics:
   - reference_device_info:
       name: Samsung Galaxy S24
diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py
index c194ed45..e34935d2 100644
--- a/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py
+++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/test.py
@@ -4,11 +4,11 @@
 # ---------------------------------------------------------------------
 import pytest
 
-from qai_hub_models.models.llama_v2_7b_chat_quantized.demo import llama_chat_demo
+from qai_hub_models.models.llama_v2_7b_chat_quantized.demo import llama_2_chat_demo
 
 
 @pytest.mark.skip("#105 move slow_cloud and slow tests to nightly.")
 @pytest.mark.slow_cloud
 def test_demo():
     # Run demo and verify it does not crash
-    llama_chat_demo(is_test=True)
+    llama_2_chat_demo(is_test=True)
diff --git a/qai_hub_models/models/mediapipe_face/export.py b/qai_hub_models/models/mediapipe_face/export.py
index adb92a20..8d84a972 100644
--- a/qai_hub_models/models/mediapipe_face/export.py
+++ b/qai_hub_models/models/mediapipe_face/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml
index ed6dfc34..a8237df8 100644
--- a/qai_hub_models/models/mediapipe_face/perf.yaml
+++ b/qai_hub_models/models/mediapipe_face/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: MediaPipeFaceDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 781.0
-      throughput: 1280.4097311139565
+      inference_time: 780.0
+      throughput: 1282.051282051282
       estimated_peak_memory_range:
-        min: 90112
-        max: 2155184
+        min: 12288
+        max: 2020120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: j7gjkey75
+      job_id: jn5qwjko5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 835.0
-      throughput: 1197.6047904191616
+      inference_time: 847.0
+      throughput: 1180.637544273908
       estimated_peak_memory_range:
-        min: 16384
-        max: 101864120
+        min: 2113536
+        max: 36582392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jvgd7o4kg
+      job_id: jygzw1kxg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 986.0
-      throughput: 1014.1987829614604
+    torchscript_onnx:
+      inference_time: 1018.0
+      throughput: 982.3182711198428
       estimated_peak_memory_range:
-        min: 552960
-        max: 8114576
+        min: 806912
+        max: 7821152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jqpyv6yrp
+      job_id: jqp48z02g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:45:23Z'
+    timestamp: '2024-06-22T22:51:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 543.0
-      throughput: 1841.6206261510129
+      inference_time: 539.0
+      throughput: 1855.287569573284
       estimated_peak_memory_range:
-        min: 16384
-        max: 31618960
+        min: 12288
+        max: 33853952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: jygzvryzp
+      job_id: jw56vklyp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 593.0
-      throughput: 1686.3406408094436
+      inference_time: 590.0
+      throughput: 1694.915254237288
       estimated_peak_memory_range:
-        min: 802816
-        max: 49388544
+        min: 12288
+        max: 42466864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jqp4jv4qp
+      job_id: jmg986e8p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 706.0
-      throughput: 1416.4305949008499
+    torchscript_onnx:
+      inference_time: 729.0
+      throughput: 1371.7421124828531
       estimated_peak_memory_range:
-        min: 548864
-        max: 22898592
+        min: 2088960
+        max: 22681920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: j1p8wzkzp
+      job_id: jo5m4jy75
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:45:25Z'
+    timestamp: '2024-06-22T22:51:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 779.0
-      throughput: 1283.6970474967907
+      inference_time: 780.0
+      throughput: 1282.051282051282
       estimated_peak_memory_range:
-        min: 12288
-        max: 1532120
+        min: 20480
+        max: 1375848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 112
-      job_id: jmg99w2qg
+      job_id: jwgomjqk5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 835.0
-      throughput: 1197.6047904191616
+      inference_time: 845.0
+      throughput: 1183.4319526627219
       estimated_peak_memory_range:
-        min: 806912
-        max: 77885504
+        min: 815104
+        max: 8055448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jopr1e7vg
+      job_id: jmg986emp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:45:21Z'
+    timestamp: '2024-06-22T22:51:10Z'
+  - torchscript_onnx_tflite:
+      inference_time: 776.0
+      throughput: 1288.659793814433
+      estimated_peak_memory_range:
+        min: 20480
+        max: 1376952
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 112
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 112
+      job_id: j7gj1j4eg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 845.0
+      throughput: 1183.4319526627219
+      estimated_peak_memory_range:
+        min: 16384
+        max: 94032616
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jvgd0jl6p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:51:12Z'
   - torchscript_onnx_qnn:
-      inference_time: 928.0
-      throughput: 1077.5862068965516
+      inference_time: 970.0
+      throughput: 1030.9278350515465
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jo5mv3ky5
+      job_id: jvgd0jlzp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 997.0
-      throughput: 1003.0090270812437
+    torchscript_onnx:
+      inference_time: 1007.0
+      throughput: 993.0486593843099
       estimated_peak_memory_range:
-        min: 5971968
-        max: 5971968
+        min: 5120000
+        max: 5120000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jn5q93d7p
+      job_id: jopr9zjkp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +256,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:45:27Z'
+    timestamp: '2024-06-22T22:51:18Z'
 - name: MediaPipeFaceLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 318.0
-      throughput: 3144.6540880503144
+      inference_time: 304.0
+      throughput: 3289.4736842105262
       estimated_peak_memory_range:
-        min: 12288
-        max: 2130328
+        min: 24576
+        max: 1457024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +272,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 101
-      job_id: jlpe4kx75
+      job_id: j1gl7jrm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 391.0
-      throughput: 2557.544757033248
+      inference_time: 390.0
+      throughput: 2564.102564102564
       estimated_peak_memory_range:
-        min: 131072
-        max: 98992544
+        min: 16384
+        max: 95095056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +287,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jz57vxnq5
+      job_id: jz5wxjnmp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 532.0
-      throughput: 1879.6992481203008
+    torchscript_onnx:
+      inference_time: 496.0
+      throughput: 2016.1290322580646
       estimated_peak_memory_range:
         min: 12288
-        max: 84060104
+        max: 5888232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +302,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: j2p0elx25
+      job_id: j0pxmw28g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +311,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:45:24Z'
+    timestamp: '2024-06-22T22:51:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 224.0
-      throughput: 4464.285714285715
+      inference_time: 240.0
+      throughput: 4166.666666666667
       estimated_peak_memory_range:
         min: 16384
-        max: 27155600
+        max: 28725248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +325,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 101
-      job_id: jz5wmqzzg
+      job_id: j1p38y2n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 283.0
-      throughput: 3533.5689045936397
+      inference_time: 285.0
+      throughput: 3508.7719298245615
       estimated_peak_memory_range:
-        min: 458752
-        max: 40876896
+        min: 12288
+        max: 34430832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +340,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: j0pxeyrj5
+      job_id: jnp13rx75
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 375.0
-      throughput: 2666.6666666666665
+    torchscript_onnx:
+      inference_time: 387.0
+      throughput: 2583.9793281653747
       estimated_peak_memory_range:
-        min: 12288
-        max: 19616240
+        min: 458752
+        max: 18610432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +355,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: jogkr3ky5
+      job_id: jegnxj8j5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +364,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:45:26Z'
+    timestamp: '2024-06-22T22:51:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 309.0
-      throughput: 3236.2459546925566
+      inference_time: 304.0
+      throughput: 3289.4736842105262
       estimated_peak_memory_range:
-        min: 12288
-        max: 1641680
+        min: 24576
+        max: 1402120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +378,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 101
-      job_id: jnp1qe1kg
+      job_id: j1pv4jxrp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 395.0
-      throughput: 2531.6455696202534
+      inference_time: 386.0
+      throughput: 2590.6735751295337
       estimated_peak_memory_range:
-        min: 290816
-        max: 8822944
+        min: 462848
+        max: 4234344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +393,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jep23lzxg
+      job_id: jnp13rxn5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,10 +402,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:45:22Z'
+    timestamp: '2024-06-22T22:51:10Z'
+  - torchscript_onnx_tflite:
+      inference_time: 319.0
+      throughput: 3134.796238244514
+      estimated_peak_memory_range:
+        min: 16384
+        max: 1984816
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 101
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 101
+      job_id: jlpe2j3vp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 389.0
+      throughput: 2570.694087403599
+      estimated_peak_memory_range:
+        min: 16384
+        max: 98050608
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 107
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 107
+      job_id: jz576q3ng
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:51:13Z'
   - torchscript_onnx_qnn:
-      inference_time: 497.0
-      throughput: 2012.0724346076458
+      inference_time: 496.0
+      throughput: 2016.1290322580646
       estimated_peak_memory_range:
         min: 442368
         max: 442368
@@ -376,14 +454,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: jegnr3wv5
+      job_id: jz5wxjn4p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 521.0
-      throughput: 1919.3857965451057
+    torchscript_onnx:
+      inference_time: 501.0
+      throughput: 1996.007984031936
       estimated_peak_memory_range:
-        min: 5312512
-        max: 5312512
+        min: 3923968
+        max: 3923968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +469,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 106
-      job_id: j1gle3qep
+      job_id: jep2j2n65
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +478,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:45:27Z'
+    timestamp: '2024-06-22T22:51:19Z'
diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py
index 0100ddad..7e906cdc 100644
--- a/qai_hub_models/models/mediapipe_hand/export.py
+++ b/qai_hub_models/models/mediapipe_hand/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml
index 67190c80..a2c7fe64 100644
--- a/qai_hub_models/models/mediapipe_hand/perf.yaml
+++ b/qai_hub_models/models/mediapipe_hand/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: MediaPipeHandDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2260.0
-      throughput: 442.4778761061947
+      inference_time: 2277.0
+      throughput: 439.17435221783046
       estimated_peak_memory_range:
-        min: 12288
-        max: 11649504
+        min: 20480
+        max: 4315184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 150
-      job_id: jwgoe394p
+      job_id: j1p88oyqp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1017.0
-      throughput: 983.284169124877
+      inference_time: 1006.0
+      throughput: 994.0357852882704
       estimated_peak_memory_range:
-        min: 20480
-        max: 21650176
+        min: 2113536
+        max: 23505176
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 197
-      job_id: jmg99wqqg
+      job_id: j7gj1xn1g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1164.0
-      throughput: 859.106529209622
+    torchscript_onnx:
+      inference_time: 1176.0
+      throughput: 850.3401360544218
       estimated_peak_memory_range:
         min: 12288
-        max: 18412096
+        max: 21144392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jqp4jv6lp
+      job_id: jo5m4rx75
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:46:02Z'
+    timestamp: '2024-06-22T22:52:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 1902.0
-      throughput: 525.7623554153523
+      inference_time: 2100.0
+      throughput: 476.1904761904762
       estimated_peak_memory_range:
-        min: 12288
-        max: 50595712
+        min: 16384
+        max: 53620752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 150
-      job_id: j7gjke875
+      job_id: jn5qw8qe5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 722.0
-      throughput: 1385.0415512465374
+      inference_time: 724.0
+      throughput: 1381.2154696132598
       estimated_peak_memory_range:
         min: 802816
-        max: 60773680
+        max: 52232928
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 197
-      job_id: jvgd7omkg
+      job_id: jygzwed4g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 837.0
-      throughput: 1194.7431302270013
+    torchscript_onnx:
+      inference_time: 838.0
+      throughput: 1193.3174224343675
       estimated_peak_memory_range:
-        min: 323584
-        max: 36752192
+        min: 237568
+        max: 30961680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jo5mv31q5
+      job_id: jopr9k3kp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:46:04Z'
+    timestamp: '2024-06-22T22:52:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 2331.0
-      throughput: 429.000429000429
+      inference_time: 2421.0
+      throughput: 413.0524576621231
       estimated_peak_memory_range:
-        min: 36864
-        max: 2444200
+        min: 12288
+        max: 1686048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 150
-      job_id: jygzvr0zp
+      job_id: jw56v64np
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1015.0
-      throughput: 985.2216748768473
+      inference_time: 1005.0
+      throughput: 995.0248756218906
       estimated_peak_memory_range:
-        min: 806912
-        max: 10668872
+        min: 802816
+        max: 22557664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 197
-      job_id: jvgd7omlg
+      job_id: jvgd0w16p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:46:01Z'
+    timestamp: '2024-06-22T22:51:57Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2277.0
+      throughput: 439.17435221783046
+      estimated_peak_memory_range:
+        min: 12288
+        max: 5476888
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 2
+        total_layers: 150
+      job_id: jwgomy615
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1020.0
+      throughput: 980.3921568627451
+      estimated_peak_memory_range:
+        min: 667648
+        max: 120131440
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 197
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 197
+      job_id: jqp48qr2g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:51:59Z'
   - torchscript_onnx_qnn:
-      inference_time: 1036.0
-      throughput: 965.2509652509652
+      inference_time: 1144.0
+      throughput: 874.1258741258741
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jmg99wqvg
+      job_id: jmg98vnmp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1182.0
-      throughput: 846.0236886632825
+    torchscript_onnx:
+      inference_time: 1206.0
+      throughput: 829.1873963515754
       estimated_peak_memory_range:
-        min: 704512
-        max: 704512
+        min: 1572864
+        max: 1572864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 196
-      job_id: jopr1emeg
+      job_id: jqpyne30g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +256,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:46:06Z'
+    timestamp: '2024-06-22T22:52:06Z'
 - name: MediaPipeHandLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1205.0
-      throughput: 829.8755186721992
+      inference_time: 1212.0
+      throughput: 825.0825082508251
       estimated_peak_memory_range:
-        min: 12288
-        max: 2551752
+        min: 16384
+        max: 1722664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +272,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 159
-      job_id: j1pvzvn7g
+      job_id: jogkdzxvp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1299.0
-      throughput: 769.8229407236336
+      inference_time: 1306.0
+      throughput: 765.6967840735069
       estimated_peak_memory_range:
         min: 802816
-        max: 8940712
+        max: 7849944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +287,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 210
-      job_id: jnp1qemkg
+      job_id: jlpe29m8p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1521.0
-      throughput: 657.4621959237344
+    torchscript_onnx:
+      inference_time: 1563.0
+      throughput: 639.7952655150352
       estimated_peak_memory_range:
-        min: 12288
-        max: 143178688
+        min: 86016
+        max: 180242064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +302,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: j0pxey895
+      job_id: jegnx2vj5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +311,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:46:03Z'
+    timestamp: '2024-06-22T22:52:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 903.0
-      throughput: 1107.4197120708748
+      inference_time: 902.0
+      throughput: 1108.6474501108648
       estimated_peak_memory_range:
         min: 12288
-        max: 59093296
+        max: 62654048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +325,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 159
-      job_id: jlpe4kn75
+      job_id: j1gl7nm25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 962.0
-      throughput: 1039.5010395010395
+      inference_time: 955.0
+      throughput: 1047.1204188481674
       estimated_peak_memory_range:
         min: 802816
-        max: 66542112
+        max: 57032672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +340,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 210
-      job_id: jz5wmqrjg
+      job_id: jz5wxo64p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1121.0
-      throughput: 892.0606601248885
+    torchscript_onnx:
+      inference_time: 1128.0
+      throughput: 886.5248226950355
       estimated_peak_memory_range:
-        min: 802816
-        max: 30698880
+        min: 0
+        max: 24398048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +355,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: jegnr3dm5
+      job_id: jep2j8y65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +364,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:46:05Z'
+    timestamp: '2024-06-22T22:52:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 1196.0
-      throughput: 836.1204013377926
+      inference_time: 1199.0
+      throughput: 834.0283569641368
       estimated_peak_memory_range:
-        min: 28672
-        max: 1643304
+        min: 12288
+        max: 9620000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +378,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 159
-      job_id: jz5wmqrzg
+      job_id: j1p38k0m5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1318.0
-      throughput: 758.7253414264036
+      inference_time: 1297.0
+      throughput: 771.0100231303007
       estimated_peak_memory_range:
-        min: 294912
-        max: 52198264
+        min: 32768
+        max: 12496920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +393,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 210
-      job_id: jz57vx1r5
+      job_id: jz576zrng
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,13 +402,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:46:01Z'
+    timestamp: '2024-06-22T22:51:57Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1211.0
+      throughput: 825.7638315441784
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1782992
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 159
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 159
+      job_id: j1pv43kzp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1298.0
+      throughput: 770.4160246533128
+      estimated_peak_memory_range:
+        min: 811008
+        max: 52503816
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 210
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 210
+      job_id: j0pxmvo8g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:52:00Z'
   - torchscript_onnx_qnn:
-      inference_time: 1513.0
-      throughput: 660.9385327164574
+      inference_time: 1501.0
+      throughput: 666.2225183211193
       estimated_peak_memory_range:
-        min: 1150976
-        max: 1150976
+        min: 786432
+        max: 786432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -376,14 +454,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: jnp1qemlg
+      job_id: jnp130zn5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1551.0
-      throughput: 644.7453255963894
+    torchscript_onnx:
+      inference_time: 1569.0
+      throughput: 637.3486297004462
       estimated_peak_memory_range:
-        min: 20062208
-        max: 20062208
+        min: 22982656
+        max: 22982656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +469,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 209
-      job_id: jep23lqmg
+      job_id: j2p0kyz05
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +478,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:46:06Z'
+    timestamp: '2024-06-22T22:52:06Z'
diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py
index 4fd3fb91..a5efe339 100644
--- a/qai_hub_models/models/mediapipe_pose/export.py
+++ b/qai_hub_models/models/mediapipe_pose/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml
index 5bcb6fa5..cf04d903 100644
--- a/qai_hub_models/models/mediapipe_pose/perf.yaml
+++ b/qai_hub_models/models/mediapipe_pose/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: MediaPipePoseDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 850.0
-      throughput: 1176.4705882352941
+      inference_time: 826.0
+      throughput: 1210.6537530266344
       estimated_peak_memory_range:
-        min: 32768
-        max: 1863416
+        min: 16384
+        max: 1530448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: j1p8wzd8p
+      job_id: jn5qw86e5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 880.0
-      throughput: 1136.3636363636363
+      inference_time: 879.0
+      throughput: 1137.6564277588168
       estimated_peak_memory_range:
-        min: 2273280
-        max: 7352768
+        min: 217088
+        max: 4696168
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jwgoe37dp
+      job_id: jygzwe34g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1001.0
-      throughput: 999.000999000999
+    torchscript_onnx:
+      inference_time: 1012.0
+      throughput: 988.1422924901186
       estimated_peak_memory_range:
-        min: 471040
-        max: 10697640
+        min: 77824
+        max: 25910224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jvgd7oylg
+      job_id: jopr9kvkp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:46:43Z'
+    timestamp: '2024-06-22T22:52:50Z'
   - torchscript_onnx_tflite:
-      inference_time: 621.0
-      throughput: 1610.3059581320451
+      inference_time: 615.0
+      throughput: 1626.0162601626016
       estimated_peak_memory_range:
         min: 61440
-        max: 42407216
+        max: 45426960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jn5q93xmp
+      job_id: jw56v6ynp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 633.0
-      throughput: 1579.778830963665
+      inference_time: 631.0
+      throughput: 1584.7860538827258
       estimated_peak_memory_range:
-        min: 208896
-        max: 48822992
+        min: 0
+        max: 38267648
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: j7gjke685
+      job_id: jmg98vlmp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 772.0
-      throughput: 1295.3367875647668
+    torchscript_onnx:
+      inference_time: 755.0
+      throughput: 1324.5033112582782
       estimated_peak_memory_range:
         min: 212992
-        max: 32138320
+        max: 28568464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jqp4jv7lp
+      job_id: jqpyne10g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:46:44Z'
+    timestamp: '2024-06-22T22:52:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 830.0
-      throughput: 1204.8192771084337
+      inference_time: 828.0
+      throughput: 1207.729468599034
       estimated_peak_memory_range:
-        min: 20480
-        max: 1868176
+        min: 28672
+        max: 1564872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 107
-      job_id: jw56qn97g
+      job_id: jwgomy215
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 888.0
-      throughput: 1126.126126126126
+      inference_time: 876.0
+      throughput: 1141.552511415525
       estimated_peak_memory_range:
-        min: 16384
-        max: 128786224
+        min: 225280
+        max: 5079312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jmg99w7vg
+      job_id: jqp48ql2g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:46:41Z'
+    timestamp: '2024-06-22T22:52:45Z'
+  - torchscript_onnx_tflite:
+      inference_time: 826.0
+      throughput: 1210.6537530266344
+      estimated_peak_memory_range:
+        min: 24576
+        max: 5251472
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 107
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 107
+      job_id: j7gj1xv1g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 887.0
+      throughput: 1127.3957158962796
+      estimated_peak_memory_range:
+        min: 16384
+        max: 124531408
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 140
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 140
+      job_id: jo5m4rn75
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:52:48Z'
   - torchscript_onnx_qnn:
-      inference_time: 1086.0
-      throughput: 920.8103130755064
+      inference_time: 1001.0
+      throughput: 999.000999000999
       estimated_peak_memory_range:
-        min: 1765376
-        max: 1765376
+        min: 528384
+        max: 528384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jygzvrq6p
+      job_id: jvgd0wx6p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1038.0
-      throughput: 963.3911368015414
+    torchscript_onnx:
+      inference_time: 1076.0
+      throughput: 929.368029739777
       estimated_peak_memory_range:
-        min: 3256320
-        max: 3256320
+        min: 241664
+        max: 241664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jo5mv37q5
+      job_id: j1p88o2qp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +256,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:46:46Z'
+    timestamp: '2024-06-22T22:52:54Z'
 - name: MediaPipePoseLandmarkDetector
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1205.0
-      throughput: 829.8755186721992
+      inference_time: 1229.0
+      throughput: 813.6696501220505
       estimated_peak_memory_range:
-        min: 200704
-        max: 2517320
+        min: 12288
+        max: 3016072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +272,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: jogkr3wo5
+      job_id: j1gl7nv25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1306.0
-      throughput: 765.6967840735069
+      inference_time: 1340.0
+      throughput: 746.2686567164179
       estimated_peak_memory_range:
-        min: 16384
-        max: 13996512
+        min: 12288
+        max: 13332312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +287,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 306
-      job_id: j1pvzvymg
+      job_id: jz5wxoe4p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1647.0
-      throughput: 607.1645415907711
+    torchscript_onnx:
+      inference_time: 1627.0
+      throughput: 614.6281499692686
       estimated_peak_memory_range:
-        min: 12288
-        max: 25082496
+        min: 16384
+        max: 25452864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +302,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 304
-      job_id: jz57vxmr5
+      job_id: jep2j8k65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +311,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:46:43Z'
+    timestamp: '2024-06-22T22:52:50Z'
   - torchscript_onnx_tflite:
-      inference_time: 864.0
-      throughput: 1157.4074074074074
+      inference_time: 878.0
+      throughput: 1138.9521640091116
       estimated_peak_memory_range:
-        min: 12288
-        max: 90560000
+        min: 16384
+        max: 94818192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +325,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: j1gle39lp
+      job_id: j1p38kjm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 944.0
-      throughput: 1059.322033898305
+      inference_time: 953.0
+      throughput: 1049.3179433368311
       estimated_peak_memory_range:
         min: 802816
-        max: 88829488
+        max: 78260944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +340,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 306
-      job_id: jlpe4k005
+      job_id: jnp1304n5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1101.0
-      throughput: 908.2652134423251
+    torchscript_onnx:
+      inference_time: 1151.0
+      throughput: 868.8097306689835
       estimated_peak_memory_range:
         min: 802816
-        max: 39260784
+        max: 37814912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +355,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 304
-      job_id: j0pxeyq95
+      job_id: j2p0ky405
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +364,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:46:45Z'
+    timestamp: '2024-06-22T22:52:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 1214.0
-      throughput: 823.7232289950576
+      inference_time: 1223.0
+      throughput: 817.6614881439084
       estimated_peak_memory_range:
-        min: 24576
-        max: 2611056
+        min: 28672
+        max: 2986752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +378,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 230
-      job_id: j1p3qelz5
+      job_id: j1pv436zp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1308.0
-      throughput: 764.525993883792
+      inference_time: 1304.0
+      throughput: 766.8711656441718
       estimated_peak_memory_range:
-        min: 434176
-        max: 15229872
+        min: 28672
+        max: 13222296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +393,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 306
-      job_id: jnp1qeklg
+      job_id: j0pxmvk8g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,10 +402,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:46:41Z'
+    timestamp: '2024-06-22T22:52:46Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1229.0
+      throughput: 813.6696501220505
+      estimated_peak_memory_range:
+        min: 12288
+        max: 2315272
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 230
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 230
+      job_id: jlpe29d8p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1327.0
+      throughput: 753.5795026375282
+      estimated_peak_memory_range:
+        min: 49152
+        max: 10385584
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 306
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 306
+      job_id: jegnx26j5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:52:48Z'
   - torchscript_onnx_qnn:
-      inference_time: 1463.0
-      throughput: 683.526999316473
+      inference_time: 1431.0
+      throughput: 698.8120195667366
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -376,14 +454,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 305
-      job_id: jz5wmq0jg
+      job_id: jz576zyng
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1886.0
-      throughput: 530.2226935312831
+    torchscript_onnx:
+      inference_time: 1613.0
+      throughput: 619.9628022318661
       estimated_peak_memory_range:
-        min: 19697664
-        max: 19697664
+        min: 14336000
+        max: 14336000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +469,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 304
-      job_id: jegnr34m5
+      job_id: jogkdzvvp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +478,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:46:46Z'
+    timestamp: '2024-06-22T22:52:54Z'
diff --git a/qai_hub_models/models/mediapipe_selfie/export.py b/qai_hub_models/models/mediapipe_selfie/export.py
index 374affc3..ec25ec48 100644
--- a/qai_hub_models/models/mediapipe_selfie/export.py
+++ b/qai_hub_models/models/mediapipe_selfie/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml
index 7f0d5fe1..94172481 100644
--- a/qai_hub_models/models/mediapipe_selfie/perf.yaml
+++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: MediaPipe-Selfie-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 806.0
-      throughput: 1240.6947890818858
+      inference_time: 733.0
+      throughput: 1364.256480218281
       estimated_peak_memory_range:
         min: 12288
-        max: 2385600
+        max: 1795456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: jqpyv6l4p
+      job_id: jw56v62np
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 784.0
-      throughput: 1275.5102040816328
+      inference_time: 769.0
+      throughput: 1300.3901170351105
       estimated_peak_memory_range:
-        min: 2240512
-        max: 96205696
+        min: 802816
+        max: 4266008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: jogkr31o5
+      job_id: j7gj1xd1g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1346.0
-      throughput: 742.9420505200594
+    torchscript_onnx:
+      inference_time: 1329.0
+      throughput: 752.4454477050414
       estimated_peak_memory_range:
-        min: 786432
-        max: 76785816
+        min: 749568
+        max: 5577256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: j1p3qedz5
+      job_id: jnp1302n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:47:12Z'
+    timestamp: '2024-06-22T22:53:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 537.0
-      throughput: 1862.1973929236499
+      inference_time: 501.0
+      throughput: 1996.007984031936
       estimated_peak_memory_range:
         min: 12288
-        max: 24988016
+        max: 26270224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: j2p0elwe5
+      job_id: j1p38knm5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 512.0
       throughput: 1953.125
       estimated_peak_memory_range:
-        min: 176128
-        max: 45965632
+        min: 802816
+        max: 39868832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: jn5q93nmp
+      job_id: jlpe29o8p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 904.0
-      throughput: 1106.1946902654868
+    torchscript_onnx:
+      inference_time: 924.0
+      throughput: 1082.2510822510822
       estimated_peak_memory_range:
-        min: 12288
-        max: 20791344
+        min: 352256
+        max: 19826912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: jwgoe3xdp
+      job_id: jvgd0wn6p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:47:13Z'
+    timestamp: '2024-06-22T22:53:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 803.0
-      throughput: 1245.3300124533
+      inference_time: 728.0
+      throughput: 1373.6263736263736
       estimated_peak_memory_range:
         min: 24576
-        max: 1606304
+        max: 1854160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 118
-      job_id: j1p8wzn8p
+      job_id: jwgomyz15
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 782.0
-      throughput: 1278.772378516624
+      inference_time: 773.0
+      throughput: 1293.6610608020699
       estimated_peak_memory_range:
-        min: 24576
-        max: 12402272
+        min: 806912
+        max: 74241896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: jw56qnx7g
+      job_id: jz5wxow4p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:47:11Z'
+    timestamp: '2024-06-22T22:53:21Z'
+  - torchscript_onnx_tflite:
+      inference_time: 725.0
+      throughput: 1379.3103448275863
+      estimated_peak_memory_range:
+        min: 12288
+        max: 4593048
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 118
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 118
+      job_id: j1pv43qzp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 781.0
+      throughput: 1280.4097311139565
+      estimated_peak_memory_range:
+        min: 802816
+        max: 8862784
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 138
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 138
+      job_id: jmg98v0mp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:53:23Z'
   - torchscript_onnx_qnn:
-      inference_time: 920.0
-      throughput: 1086.9565217391305
+      inference_time: 879.0
+      throughput: 1137.6564277588168
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 138
-      job_id: j1gle3dlp
+      job_id: jygzwe24g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1362.0
-      throughput: 734.2143906020558
+    torchscript_onnx:
+      inference_time: 1342.0
+      throughput: 745.156482861401
       estimated_peak_memory_range:
-        min: 2674688
-        max: 2674688
+        min: 1925120
+        max: 1925120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 140
-      job_id: j1pvzv8mg
+      job_id: jz576z2ng
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:47:14Z'
+    timestamp: '2024-06-22T22:53:26Z'
diff --git a/qai_hub_models/models/midas/app.py b/qai_hub_models/models/midas/app.py
index 949c87b1..7de7758e 100644
--- a/qai_hub_models/models/midas/app.py
+++ b/qai_hub_models/models/midas/app.py
@@ -51,8 +51,7 @@ def estimate_depth(
             image, (self.input_height, self.input_width)
         )
         image_tensor = transforms.ToTensor()(resized_image).unsqueeze(0)
-        with torch.no_grad():
-            prediction = self.model(image_tensor)
+        prediction = self.model(image_tensor)
         prediction = undo_resize_pad(
             prediction.unsqueeze(0), image.size, scale, padding
         )
diff --git a/qai_hub_models/models/midas/export.py b/qai_hub_models/models/midas/export.py
index adcb9fbe..7d60c6ad 100644
--- a/qai_hub_models/models/midas/export.py
+++ b/qai_hub_models/models/midas/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,7 +116,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(
         model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
@@ -124,7 +123,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/midas/model.py b/qai_hub_models/models/midas/model.py
index 669ecdd0..c0d9babb 100644
--- a/qai_hub_models/models/midas/model.py
+++ b/qai_hub_models/models/midas/model.py
@@ -21,7 +21,7 @@
 from qai_hub_models.utils.input_spec import InputSpec
 
 MODEL_ID = __name__.split(".")[-2]
-MODEL_ASSET_VERSION = 2
+MODEL_ASSET_VERSION = 3
 
 SOURCE_REPO = "https://github.com/isl-org/MiDaS/"
 REPO_COMMIT = "bdc4ed64c095e026dc0a2f17cabb14d58263decb"
diff --git a/qai_hub_models/models/midas/perf.yaml b/qai_hub_models/models/midas/perf.yaml
index 047b62cc..e961f646 100644
--- a/qai_hub_models/models/midas/perf.yaml
+++ b/qai_hub_models/models/midas/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Midas-V2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3428.0
-      throughput: 291.71528588098016
+      inference_time: 3432.0
+      throughput: 291.3752913752914
       estimated_peak_memory_range:
-        min: 12288
-        max: 2878504
+        min: 16384
+        max: 1861632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jlpe4kq05
+      job_id: j0pxmv98g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3372.0
-      throughput: 296.55990510083035
+      inference_time: 3375.0
+      throughput: 296.2962962962963
       estimated_peak_memory_range:
         min: 806912
-        max: 11534808
+        max: 11813464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jmg99wrvg
+      job_id: jep2j8x65
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3451.0
-      throughput: 289.77108084613155
+    torchscript_onnx:
+      inference_time: 3479.0
+      throughput: 287.4389192296637
       estimated_peak_memory_range:
-        min: 12288
-        max: 177641176
+        min: 0
+        max: 128986832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jqp4jvzlp
+      job_id: jn5qw8ee5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:47:43Z'
+    timestamp: '2024-06-22T22:53:58Z'
   - torchscript_onnx_tflite:
-      inference_time: 2407.0
-      throughput: 415.45492314083924
+      inference_time: 2410.0
+      throughput: 414.9377593360996
       estimated_peak_memory_range:
         min: 12288
-        max: 82857536
+        max: 88013264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jygzvr66p
+      job_id: jo5m4re75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2404.0
-      throughput: 415.97337770382694
+      inference_time: 2396.0
+      throughput: 417.3622704507512
       estimated_peak_memory_range:
         min: 802816
-        max: 65062640
+        max: 56145456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jnp1qe9lg
+      job_id: jqpynez0g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2416.0
-      throughput: 413.9072847682119
+    torchscript_onnx:
+      inference_time: 2508.0
+      throughput: 398.72408293460927
       estimated_peak_memory_range:
-        min: 389120
-        max: 38273760
+        min: 483328
+        max: 35886496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: j0pxeyw95
+      job_id: j1gl7n625
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:47:44Z'
+    timestamp: '2024-06-22T22:53:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 3435.0
-      throughput: 291.1208151382824
+      inference_time: 3428.0
+      throughput: 291.71528588098016
       estimated_peak_memory_range:
         min: 16384
-        max: 2408992
+        max: 1655616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 139
-      job_id: jz5wmqkjg
+      job_id: jegnx20j5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3369.0
-      throughput: 296.8239833778569
+      inference_time: 3383.0
+      throughput: 295.5956251847473
       estimated_peak_memory_range:
-        min: 802816
-        max: 11302408
+        min: 806912
+        max: 14656904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jz57vxqr5
+      job_id: j1p88o0qp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:47:42Z'
+    timestamp: '2024-06-22T22:53:55Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3429.0
+      throughput: 291.6302128900554
+      estimated_peak_memory_range:
+        min: 16384
+        max: 2350168
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 139
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 139
+      job_id: jopr9k6kp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 3377.0
+      throughput: 296.12081729345573
+      estimated_peak_memory_range:
+        min: 802816
+        max: 14381152
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 199
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 199
+      job_id: jogkdz7vp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:53:57Z'
   - torchscript_onnx_qnn:
-      inference_time: 3529.0
-      throughput: 283.36639274582035
+      inference_time: 3365.0
+      throughput: 297.1768202080238
       estimated_peak_memory_range:
         min: 786432
         max: 786432
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jvgd7oklg
+      job_id: j2p0ky305
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3447.0
-      throughput: 290.1073397156948
+    torchscript_onnx:
+      inference_time: 3439.0
+      throughput: 290.7822041291073
       estimated_peak_memory_range:
-        min: 9965568
-        max: 9965568
+        min: 864256
+        max: 864256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 199
-      job_id: jo5mv3jq5
+      job_id: jw56v6enp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:47:45Z'
+    timestamp: '2024-06-22T22:54:00Z'
diff --git a/qai_hub_models/models/midas_quantized/README.md b/qai_hub_models/models/midas_quantized/README.md
new file mode 100644
index 00000000..56c96394
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/README.md
@@ -0,0 +1,61 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [Midas-V2-Quantized: Quantized Deep Convolutional Neural Network model for depth estimation](https://aihub.qualcomm.com/models/midas_quantized)
+
+Midas is designed for estimating depth at each point in an image.
+
+This is based on the implementation of Midas-V2-Quantized found
+[here](https://github.com/isl-org/MiDaS). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/midas_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+
+
+## Example & Usage
+
+Install the package via pip:
+```bash
+pip install "qai_hub_models[midas_quantized]"
+```
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.midas_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.midas_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of Midas-V2-Quantized can be found
+  [here](https://github.com/isl-org/MiDaS/blob/master/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
+
+## References
+* [Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer](https://arxiv.org/abs/1907.01341v3)
+* [Source Model Implementation](https://github.com/isl-org/MiDaS)
+
+## Community
+* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/midas_quantized/__init__.py b/qai_hub_models/models/midas_quantized/__init__.py
new file mode 100644
index 00000000..b9d6a15c
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/__init__.py
@@ -0,0 +1,8 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models.midas.app import MidasApp as App  # noqa: F401
+
+from .model import MODEL_ID  # noqa: F401
+from .model import MidasQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/midas_quantized/conftest.py b/qai_hub_models/models/midas_quantized/conftest.py
new file mode 100644
index 00000000..10f869cc
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/conftest.py
@@ -0,0 +1,39 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+import inspect
+
+import pytest
+
+from qai_hub_models.models.midas_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+# Instantiate the model only once for all tests.
+# Mock from_pretrained to always return the initialized model.
+# This speeds up tests and limits memory leaks.
+@pytest.fixture(scope="module", autouse=True)
+def cached_from_pretrained():
+    with pytest.MonkeyPatch.context() as mp:
+        pretrained_cache = {}
+        from_pretrained = Model.from_pretrained
+        sig = inspect.signature(from_pretrained)
+
+        @skip_clone_repo_check
+        def _cached_from_pretrained(*args, **kwargs):
+            cache_key = str(args) + str(kwargs)
+            model = pretrained_cache.get(cache_key, None)
+            if model:
+                return model
+            else:
+                model = from_pretrained(*args, **kwargs)
+                pretrained_cache[cache_key] = model
+                return model
+
+        _cached_from_pretrained.__signature__ = sig
+
+        mp.setattr(Model, "from_pretrained", _cached_from_pretrained)
+        yield mp
diff --git a/qai_hub_models/models/midas_quantized/demo.py b/qai_hub_models/models/midas_quantized/demo.py
new file mode 100644
index 00000000..0c152370
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models.midas.demo import midas_demo
+from qai_hub_models.models.midas_quantized.model import MidasQuantizable
+
+
+def main(is_test: bool = False):
+    midas_demo(MidasQuantizable, is_test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/midas_quantized/export.py b/qai_hub_models/models/midas_quantized/export.py
new file mode 100644
index 00000000..74a435bd
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/export.py
@@ -0,0 +1,232 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.midas_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23 (Family)",
+    chipset: Optional[str] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        chipset: If set, will choose a random device with this chipset.
+            Overrides the `device` argument.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "midas_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if chipset:
+        hub_device = hub.Device(attributes=f"chipset:{chipset}")
+    else:
+        hub_device = hub.Device(name=device)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "midas_quantized",
+            "Midas-V2-Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec, check_trace=False
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
+    channel_last_flags = (
+        " --force_channel_last_input image"
+        if target_runtime != TargetRuntime.ONNX
+        else ""
+    )
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options + channel_last_flags, hub_device
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub_device,
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = (
+            sample_inputs
+            if target_runtime == TargetRuntime.ONNX
+            else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = "so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = "tflite"
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
+            target_runtime_extension = "onnx"
+
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/midas_quantized/info.yaml b/qai_hub_models/models/midas_quantized/info.yaml
new file mode 100644
index 00000000..921215a4
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/info.yaml
@@ -0,0 +1,35 @@
+name: Midas-V2-Quantized
+# id must match with the model dir name in qai_hub_models
+id: midas_quantized
+status: public
+headline: Quantized Deep Convolutional Neural Network model for depth estimation.
+domain: Computer Vision
+use_case: Depth Estimation
+description: Midas is designed for estimating depth at each point in an image.
+tags:
+  - quantized
+research_paper: https://arxiv.org/abs/1907.01341v3
+research_paper_title: 'Towards Robust Monocular Depth Estimation: Mixing Datasets
+  for Zero-shot Cross-dataset Transfer'
+license: https://github.com/isl-org/MiDaS/blob/master/LICENSE
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/isl-org/MiDaS
+technical_details:
+  Model checkpoint: MiDaS_small
+  Input resolution: 256x256
+  Number of parameters: 16.6M
+  Model size: 16.6 MB
+applicable_scenarios:
+  - Anomaly Detection
+  - Inventory Management
+related_models: []
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+has_static_banner: yes
+has_animated_banner: no
+license_type: mit
+deploy_license_type: AI Model Hub License
+dataset: []
diff --git a/qai_hub_models/models/midas_quantized/model.py b/qai_hub_models/models/midas_quantized/model.py
new file mode 100644
index 00000000..a26b6fc2
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/model.py
@@ -0,0 +1,103 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+    constrain_quantized_inputs_to_image_range,
+    tie_observers,
+    convert_all_depthwise_to_per_tensor,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.batch_norm_fold import fold_all_batch_norms
+from aimet_torch.cross_layer_equalization import CrossLayerScaling
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel
+
+from qai_hub_models.models.midas.model import Midas
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 1
+DEFAULT_ENCODINGS = "midas_quantized_encodings.json"
+
+
+class MidasQuantizable(AIMETQuantizableMixin, Midas):
+    """Midas with post train quantization support.
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        model: QuantizationSimModel,
+    ) -> None:
+        # Input is already normalized by sim_model. Disable it in the wrapper model.
+        Midas.__init__(self, model.model, normalize_input=False)
+        AIMETQuantizableMixin.__init__(
+            self,
+            model,
+        )
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "MidasQuantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        model = Midas.from_pretrained()
+        input_shape = cls.get_input_spec()["image"][0]
+        dummy_input = torch.rand(input_shape)
+
+        model = prepare_model(model)
+        fold_all_batch_norms(model, input_shape, dummy_input)
+        CrossLayerScaling.scale_model(model, input_shape, dummy_input)
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=dummy_input,
+        )
+        convert_all_depthwise_to_per_tensor(sim.model)
+        tie_observers(sim)
+        constrain_quantized_inputs_to_image_range(sim)
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            sim.load_encodings(aimet_encodings, strict=False)
+
+        return cls(sim)
+
+    def forward(self, image):
+        """
+        Runs the model on an image tensor and returns a tensor of depth estimates
+
+        Parameters:
+            image: A [1, 3, H, W] image.
+                   Pixel values pre-processed for encoder consumption.
+                   Range: float[0, 1] if self.normalize_input, else ~[-2.5, 2.5]
+                   3-channel Color Space: RGB
+
+        Returns:
+            Tensor of depth estimates of size [1, H, W].
+        """
+        return self.model(image)
diff --git a/qai_hub_models/models/midas_quantized/perf.yaml b/qai_hub_models/models/midas_quantized/perf.yaml
new file mode 100644
index 00000000..c8b8921d
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/perf.yaml
@@ -0,0 +1,265 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - QCS8250 (Proxy)
+  - QCS8550 (Proxy)
+  - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy S24+
+  - Samsung Galaxy Tab S8
+  - Snapdragon X Elite CRD
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Qcs8250
+  - Qcs8550
+  - Sa8540p
+  - Sa8775p
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+  - Snapdragon® X Elite
+models:
+- name: Midas-V2-Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 1154.0
+      throughput: 866.5511265164645
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1987952
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jwgomyk15
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1482.0
+      throughput: 674.7638326585695
+      estimated_peak_memory_range:
+        min: 16384
+        max: 287985312
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jmg98vjmp
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-06-22T22:55:02Z'
+  - torchscript_onnx_tflite:
+      inference_time: 831.0
+      throughput: 1203.3694344163657
+      estimated_peak_memory_range:
+        min: 12288
+        max: 87642336
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: j1pv430zp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1073.0
+      throughput: 931.9664492078285
+      estimated_peak_memory_range:
+        min: 208896
+        max: 58965184
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jnp130yn5
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-06-22T22:55:03Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1161.0
+      throughput: 861.3264427217915
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1709936
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: j7gj1xz1g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1496.0
+      throughput: 668.4491978609626
+      estimated_peak_memory_range:
+        min: 151552
+        max: 165684208
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jz5wxo2zp
+      job_status: Passed
+    reference_device_info:
+      name: QCS8550 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8550
+    timestamp: '2024-06-22T22:55:05Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1160.0
+      throughput: 862.0689655172414
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1648160
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jlpe29e8p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1493.0
+      throughput: 669.7923643670462
+      estimated_peak_memory_range:
+        min: 20480
+        max: 11342368
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jmg98vjqp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:55:07Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3825.0
+      throughput: 261.437908496732
+      estimated_peak_memory_range:
+        min: 12288
+        max: 50396464
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jygzweo4g
+      job_status: Passed
+    reference_device_info:
+      name: RB3 Gen 2 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs6490
+    timestamp: '2024-06-22T22:54:59Z'
+  - torchscript_onnx_tflite:
+      inference_time: 15476.0
+      throughput: 64.61617989144482
+      estimated_peak_memory_range:
+        min: 106496
+        max: 3734664
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jz5wxo24p
+      job_status: Passed
+    reference_device_info:
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8250
+    timestamp: '2024-06-22T22:55:00Z'
+  - torchscript_onnx_qnn:
+      inference_time: 1521.0
+      throughput: 657.4621959237344
+      estimated_peak_memory_range:
+        min: 483328
+        max: 483328
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jvgd0we6p
+      job_status: Passed
+    reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-06-22T22:55:04Z'
diff --git a/qai_hub_models/models/midas_quantized/requirements.txt b/qai_hub_models/models/midas_quantized/requirements.txt
new file mode 100644
index 00000000..4942579d
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/requirements.txt
@@ -0,0 +1,2 @@
+aimet-torch==1.31.2; sys_platform == "linux"
+timm==1.0.3
diff --git a/qai_hub_models/models/midas_quantized/test.py b/qai_hub_models/models/midas_quantized/test.py
new file mode 100644
index 00000000..032a129e
--- /dev/null
+++ b/qai_hub_models/models/midas_quantized/test.py
@@ -0,0 +1,39 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import numpy as np
+
+from qai_hub_models.models.midas.app import MidasApp
+from qai_hub_models.models.midas.demo import INPUT_IMAGE_ADDRESS
+from qai_hub_models.models.midas_quantized.demo import main as demo_main
+from qai_hub_models.models.midas_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    MidasQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "midas_output.png"
+)
+
+
+# Verify that the output from Torch is as expected.
+@skip_clone_repo_check
+def test_task():
+    (_, _, height, width) = MidasQuantizable.get_input_spec()["image"][0]
+    app = MidasApp(MidasQuantizable.from_pretrained(), height, width)
+    original_image = load_image(INPUT_IMAGE_ADDRESS)
+    output_image = app.estimate_depth(original_image)
+    output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS)
+
+    np.testing.assert_allclose(
+        np.asarray(output_image), np.asarray(output_image_oracle), atol=3
+    )
+
+
+@skip_clone_repo_check
+def test_demo():
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/mnasnet05/export.py b/qai_hub_models/models/mnasnet05/export.py
index ed49880c..19e67885 100644
--- a/qai_hub_models/models/mnasnet05/export.py
+++ b/qai_hub_models/models/mnasnet05/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml
index 0ca342e3..37d95b49 100644
--- a/qai_hub_models/models/mnasnet05/perf.yaml
+++ b/qai_hub_models/models/mnasnet05/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: MNASNet05
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 782.0
-      throughput: 1278.772378516624
+      inference_time: 775.0
+      throughput: 1290.3225806451612
       estimated_peak_memory_range:
-        min: 20480
-        max: 1961704
+        min: 24576
+        max: 1553160
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jlpe4kj05
+      job_id: jegnx2mv5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 826.0
-      throughput: 1210.6537530266344
+      inference_time: 823.0
+      throughput: 1215.0668286755772
       estimated_peak_memory_range:
-        min: 618496
-        max: 5537568
+        min: 12288
+        max: 35900696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jmg99w6vg
+      job_id: j2p0ky225
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 798.0
-      throughput: 1253.1328320802006
+    torchscript_onnx:
+      inference_time: 800.0
+      throughput: 1250.0
       estimated_peak_memory_range:
-        min: 12288
-        max: 155086488
+        min: 16384
+        max: 19204504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: jqp4jvqlp
+      job_id: jw56v6zvp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:49:16Z'
+    timestamp: '2024-06-22T22:55:41Z'
   - torchscript_onnx_tflite:
-      inference_time: 546.0
-      throughput: 1831.5018315018315
+      inference_time: 521.0
+      throughput: 1919.3857965451057
       estimated_peak_memory_range:
         min: 12288
-        max: 46076672
+        max: 48642336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jygzvr16p
+      job_id: jopr9k2vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 564.0
-      throughput: 1773.049645390071
+      inference_time: 565.0
+      throughput: 1769.9115044247787
       estimated_peak_memory_range:
-        min: 0
-        max: 41703392
+        min: 626688
+        max: 37475152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jnp1qerlg
+      job_id: j1p88omzp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 560.0
-      throughput: 1785.7142857142858
+    torchscript_onnx:
+      inference_time: 578.0
+      throughput: 1730.1038062283737
       estimated_peak_memory_range:
-        min: 31727616
-        max: 59957408
+        min: 225280
+        max: 21866704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: j0pxeyv95
+      job_id: j1p38k1x5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:49:17Z'
+    timestamp: '2024-06-22T22:55:42Z'
   - torchscript_onnx_tflite:
-      inference_time: 773.0
-      throughput: 1293.6610608020699
+      inference_time: 783.0
+      throughput: 1277.139208173691
       estimated_peak_memory_range:
-        min: 12288
-        max: 159861568
+        min: 16384
+        max: 1688792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jz5wmqjjg
+      job_id: jep2j89x5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 826.0
-      throughput: 1210.6537530266344
+      inference_time: 814.0
+      throughput: 1228.5012285012285
       estimated_peak_memory_range:
-        min: 16384
-        max: 14027976
+        min: 618496
+        max: 5453992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jz57vxzr5
+      job_id: jn5qw8r75
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:49:15Z'
+    timestamp: '2024-06-22T22:55:39Z'
+  - torchscript_onnx_tflite:
+      inference_time: 781.0
+      throughput: 1280.4097311139565
+      estimated_peak_memory_range:
+        min: 24576
+        max: 1741208
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 71
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 71
+      job_id: jqpynejrg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 820.0
+      throughput: 1219.5121951219512
+      estimated_peak_memory_range:
+        min: 16384
+        max: 98992176
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 103
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 103
+      job_id: j1gl7n2e5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:55:40Z'
   - torchscript_onnx_qnn:
-      inference_time: 946.0
-      throughput: 1057.0824524312895
+      inference_time: 942.0
+      throughput: 1061.5711252653928
       estimated_peak_memory_range:
-        min: 606208
-        max: 606208
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jvgd7ojlg
+      job_id: jogkdzqyp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 807.0
-      throughput: 1239.1573729863692
+    torchscript_onnx:
+      inference_time: 810.0
+      throughput: 1234.567901234568
       estimated_peak_memory_range:
-        min: 18001920
-        max: 18001920
+        min: 14974976
+        max: 14974976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 104
-      job_id: jo5mv3rq5
+      job_id: jwgomyn45
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:49:18Z'
+    timestamp: '2024-06-22T22:55:44Z'
diff --git a/qai_hub_models/models/mobilenet_v2/export.py b/qai_hub_models/models/mobilenet_v2/export.py
index 3201e32a..23b8df96 100644
--- a/qai_hub_models/models/mobilenet_v2/export.py
+++ b/qai_hub_models/models/mobilenet_v2/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v2/model.py b/qai_hub_models/models/mobilenet_v2/model.py
index e52bd028..16f8cf09 100644
--- a/qai_hub_models/models/mobilenet_v2/model.py
+++ b/qai_hub_models/models/mobilenet_v2/model.py
@@ -34,7 +34,6 @@ def from_pretrained(cls, weights: str = MOBILENETV2_WEIGHTS) -> MobileNetV2:
             k.replace("classifier.1", "classifier"): v for k, v in checkpoint.items()
         }
         model.load_state_dict(state_dict)
-        model.eval()
 
         return cls(model)
 
diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml
index bdd64620..1a98ff1e 100644
--- a/qai_hub_models/models/mobilenet_v2/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v2/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: MobileNet-v2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 940.0
-      throughput: 1063.8297872340424
+      inference_time: 927.0
+      throughput: 1078.7486515641856
       estimated_peak_memory_range:
-        min: 57344
-        max: 1721784
+        min: 24576
+        max: 1350608
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jopr1ekeg
+      job_id: j7gj1x27g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1266.0
-      throughput: 789.8894154818325
+      inference_time: 1247.0
+      throughput: 801.924619085806
       estimated_peak_memory_range:
-        min: 622592
-        max: 53135336
+        min: 16384
+        max: 40792520
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: j2p0elye5
+      job_id: jmg98vyqp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 938.0
-      throughput: 1066.0980810234541
+    torchscript_onnx:
+      inference_time: 934.0
+      throughput: 1070.6638115631692
       estimated_peak_memory_range:
-        min: 16384
-        max: 21567360
+        min: 12288
+        max: 105144288
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: j1gle3nlp
+      job_id: j0pxmv6jg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:49:43Z'
+    timestamp: '2024-06-22T22:56:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 643.0
-      throughput: 1555.2099533437015
+      inference_time: 615.0
+      throughput: 1626.0162601626016
       estimated_peak_memory_range:
-        min: 0
-        max: 58244480
+        min: 16384
+        max: 61192112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jep23l8mg
+      job_id: jlpe29w7p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 826.0
-      throughput: 1210.6537530266344
+      inference_time: 840.0
+      throughput: 1190.4761904761904
       estimated_peak_memory_range:
         min: 618496
-        max: 40424432
+        max: 39385632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: j1p8wzo8p
+      job_id: jnp130wk5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 666.0
-      throughput: 1501.5015015015015
+    torchscript_onnx:
+      inference_time: 669.0
+      throughput: 1494.7683109118086
       estimated_peak_memory_range:
-        min: 487424
-        max: 27269952
+        min: 540672
+        max: 23244304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jw56qn67g
+      job_id: jo5m4r6y5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:49:44Z'
+    timestamp: '2024-06-22T22:56:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 941.0
-      throughput: 1062.6992561105208
+      inference_time: 930.0
+      throughput: 1075.268817204301
       estimated_peak_memory_range:
-        min: 20480
-        max: 1483664
+        min: 12288
+        max: 11068088
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jqpyv6e4p
+      job_id: jygzwejzg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1272.0
-      throughput: 786.1635220125786
+      inference_time: 1253.0
+      throughput: 798.0845969672786
       estimated_peak_memory_range:
-        min: 618496
-        max: 41687968
+        min: 86016
+        max: 7149840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jn5q938mp
+      job_id: jz576zlqg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:49:42Z'
+    timestamp: '2024-06-22T22:56:10Z'
+  - torchscript_onnx_tflite:
+      inference_time: 928.0
+      throughput: 1077.5862068965516
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1683024
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 72
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 72
+      job_id: jz5wxo3zp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1256.0
+      throughput: 796.1783439490446
+      estimated_peak_memory_range:
+        min: 622592
+        max: 5426288
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 105
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 105
+      job_id: jqp48qdqg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:56:11Z'
   - torchscript_onnx_qnn:
-      inference_time: 1555.0
-      throughput: 643.0868167202573
+      inference_time: 1342.0
+      throughput: 745.156482861401
       estimated_peak_memory_range:
-        min: 1355776
-        max: 1355776
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: jogkr3zo5
+      job_id: jvgd0wqkp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 987.0
-      throughput: 1013.1712259371834
+    torchscript_onnx:
+      inference_time: 992.0
+      throughput: 1008.0645161290323
       estimated_peak_memory_range:
-        min: 5607424
-        max: 5607424
+        min: 3272704
+        max: 3272704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 105
-      job_id: j1p3qekz5
+      job_id: jegnx23v5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:49:45Z'
+    timestamp: '2024-06-22T22:56:15Z'
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/export.py b/qai_hub_models/models/mobilenet_v2_quantized/export.py
index b025f312..4cafe05d 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/export.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/model.py b/qai_hub_models/models/mobilenet_v2_quantized/model.py
index f391a9b4..d884a6c7 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/model.py
+++ b/qai_hub_models/models/mobilenet_v2_quantized/model.py
@@ -86,5 +86,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
index f7621f54..a3f50962 100644
--- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: MobileNet-v2-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 291.0
-      throughput: 3436.426116838488
+      inference_time: 288.0
+      throughput: 3472.222222222222
       estimated_peak_memory_range:
-        min: 53248
-        max: 1718392
+        min: 12288
+        max: 1528616
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: j1pvzv3mg
+      job_id: jep2j8lx5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 647.0
-      throughput: 1545.595054095827
+      inference_time: 653.0
+      throughput: 1531.3935681470139
       estimated_peak_memory_range:
-        min: 45056
-        max: 16933008
+        min: 24576
+        max: 6402824
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jmg99wvvg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 549.0
-      throughput: 1821.4936247723133
-      estimated_peak_memory_range:
-        min: 12288
-        max: 22837192
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 74
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 74
-      job_id: jmg99w1lg
+      job_id: j1gl7n3e5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:50:23Z'
+    timestamp: '2024-06-22T22:56:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 215.0
-      throughput: 4651.162790697675
+      inference_time: 234.0
+      throughput: 4273.504273504273
       estimated_peak_memory_range:
         min: 12288
-        max: 38045216
+        max: 39896768
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: j7gjkex85
+      job_id: jqpyne6rg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 474.0
-      throughput: 2109.7046413502107
+      inference_time: 476.0
+      throughput: 2100.840336134454
       estimated_peak_memory_range:
         min: 163840
-        max: 38345936
+        max: 35351600
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jnp1qe0lg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 395.0
-      throughput: 2531.6455696202534
-      estimated_peak_memory_range:
-        min: 12288
-        max: 23651472
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 74
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 74
-      job_id: jnp1qel2g
+      job_id: jw56v6nvp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:50:24Z'
+    timestamp: '2024-06-22T22:56:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 301.0
-      throughput: 3322.2591362126245
+      inference_time: 295.0
+      throughput: 3389.830508474576
       estimated_peak_memory_range:
         min: 12288
-        max: 1685448
+        max: 1336624
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jlpe4k905
+      job_id: j2p0kyl25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 654.0
-      throughput: 1529.051987767584
+      inference_time: 655.0
+      throughput: 1526.7175572519084
       estimated_peak_memory_range:
-        min: 16384
-        max: 123157128
+        min: 32768
+        max: 6364808
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jz5wmqv6g
+      job_id: jwgomy345
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:50:22Z'
+    timestamp: '2024-06-22T22:56:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 850.0
-      throughput: 1176.4705882352941
+      inference_time: 296.0
+      throughput: 3378.3783783783783
       estimated_peak_memory_range:
         min: 12288
-        max: 24025456
+        max: 1915592
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jygzvre6p
+      job_id: j1p88ozzp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 664.0
+      throughput: 1506.0240963855422
+      estimated_peak_memory_range:
+        min: 16384
+        max: 119736200
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 71
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 71
+      job_id: j1pv43v7p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:56:56Z'
+  - torchscript_onnx_tflite:
+      inference_time: 870.0
+      throughput: 1149.4252873563219
+      estimated_peak_memory_range:
+        min: 12288
+        max: 25149472
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 72
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 72
+      job_id: jogkdz3yp
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T22:50:16Z'
+    timestamp: '2024-06-22T22:56:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 7601.0
-      throughput: 131.56163662675965
+      inference_time: 7445.0
+      throughput: 134.31833445265278
       estimated_peak_memory_range:
-        min: 253952
-        max: 8158832
+        min: 40960
+        max: 7492840
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 2
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jz5wmqojg
+      job_id: jn5qw8375
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T22:50:17Z'
+    timestamp: '2024-06-22T22:56:49Z'
   - torchscript_onnx_qnn:
-      inference_time: 740.0
-      throughput: 1351.3513513513512
+      inference_time: 739.0
+      throughput: 1353.1799729364006
       estimated_peak_memory_range:
-        min: 696320
-        max: 696320
+        min: 540672
+        max: 540672
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jvgd7owlg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 554.0
-      throughput: 1805.0541516245487
-      estimated_peak_memory_range:
-        min: 20283392
-        max: 20283392
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 74
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 74
-      job_id: jvgd7o9eg
+      job_id: j1p38kex5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:50:25Z'
+    timestamp: '2024-06-22T22:56:53Z'
diff --git a/qai_hub_models/models/mobilenet_v3_large/export.py b/qai_hub_models/models/mobilenet_v3_large/export.py
index da4d660d..934fc5ab 100644
--- a/qai_hub_models/models/mobilenet_v3_large/export.py
+++ b/qai_hub_models/models/mobilenet_v3_large/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml
index 8ea50f02..de6f07e1 100644
--- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: MobileNet-v3-Large
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 999.0
-      throughput: 1001.001001001001
+      inference_time: 1009.0
+      throughput: 991.0802775024777
       estimated_peak_memory_range:
-        min: 16384
-        max: 1600024
+        min: 12288
+        max: 1911392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 136
+        layers_on_npu: 128
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 136
-      job_id: jqp4jvovp
+        total_layers: 128
+      job_id: jvgd0wokp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1048.0
-      throughput: 954.1984732824427
+      inference_time: 1045.0
+      throughput: 956.9377990430622
       estimated_peak_memory_range:
-        min: 647168
-        max: 48048184
+        min: 16384
+        max: 59446776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jegnr3yr5
+      job_id: jo5m4r3y5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1039.0
-      throughput: 962.4639076034649
+    torchscript_onnx:
+      inference_time: 1079.0
+      throughput: 926.7840593141798
       estimated_peak_memory_range:
         min: 12288
-        max: 82696432
+        max: 82439152
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 162
-      job_id: j2p0elq65
+      job_id: j2p0kyr25
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,28 +89,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:50:50Z'
+    timestamp: '2024-06-22T22:57:31Z'
   - torchscript_onnx_tflite:
-      inference_time: 703.0
-      throughput: 1422.475106685633
+      inference_time: 686.0
+      throughput: 1457.725947521866
       estimated_peak_memory_range:
-        min: 12288
-        max: 62391952
+        min: 16384
+        max: 64600528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 136
+        layers_on_npu: 128
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 136
-      job_id: j0pxeyj15
+        total_layers: 128
+      job_id: jz576zxqg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 718.0
-      throughput: 1392.757660167131
+      inference_time: 715.0
+      throughput: 1398.6013986013986
       estimated_peak_memory_range:
-        min: 618496
-        max: 51941056
+        min: 0
+        max: 47907456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jopr1eq9g
+      job_id: jegnx2ev5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 713.0
-      throughput: 1402.5245441795232
+    torchscript_onnx:
+      inference_time: 745.0
+      throughput: 1342.2818791946308
       estimated_peak_memory_range:
-        min: 618496
-        max: 29120336
+        min: 12288
+        max: 20541504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 162
-      job_id: j1p8wz9xp
+      job_id: j1p88o7zp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,28 +142,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:50:51Z'
+    timestamp: '2024-06-22T22:57:32Z'
   - torchscript_onnx_tflite:
-      inference_time: 1001.0
-      throughput: 999.000999000999
+      inference_time: 1006.0
+      throughput: 994.0357852882704
       estimated_peak_memory_range:
-        min: 45056
-        max: 1507408
+        min: 32768
+        max: 1979760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 136
+        layers_on_npu: 128
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 136
-      job_id: jo5mv32w5
+        total_layers: 128
+      job_id: jqp48qvqg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1042.0
-      throughput: 959.6928982725528
+      inference_time: 1028.0
+      throughput: 972.7626459143969
       estimated_peak_memory_range:
-        min: 626688
-        max: 69049656
+        min: 217088
+        max: 68997232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jqpyv6w7p
+      job_id: jep2j8mx5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:50:49Z'
+    timestamp: '2024-06-22T22:57:29Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1014.0
+      throughput: 986.1932938856016
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1724600
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 128
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 128
+      job_id: j0pxmvyjg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1046.0
+      throughput: 956.0229445506692
+      estimated_peak_memory_range:
+        min: 0
+        max: 69030600
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 144
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 144
+      job_id: jqpynedrg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:57:30Z'
   - torchscript_onnx_qnn:
-      inference_time: 1199.0
-      throughput: 834.0283569641368
+      inference_time: 1156.0
+      throughput: 865.0519031141869
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 144
-      job_id: jep23l64g
+      job_id: jopr9kyvp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1086.0
-      throughput: 920.8103130755064
+    torchscript_onnx:
+      inference_time: 1066.0
+      throughput: 938.0863039399625
       estimated_peak_memory_range:
-        min: 51040256
-        max: 51040256
+        min: 51892224
+        max: 51892224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 162
-      job_id: jogkr3n25
+      job_id: jogkdzyyp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:50:52Z'
+    timestamp: '2024-06-22T22:57:33Z'
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
index 7948d791..8d733eb7 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py
index fdcf83ec..b13a9d4c 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/model.py
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/model.py
@@ -78,5 +78,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
index b5cebb9b..e0a45276 100644
--- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: MobileNet-v3-Large-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 371.0
-      throughput: 2695.4177897574123
+      inference_time: 359.0
+      throughput: 2785.515320334262
       estimated_peak_memory_range:
-        min: 16384
-        max: 1268000
+        min: 24576
+        max: 1507232
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: j1gle3z8p
+      job_id: j1gl7nke5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 622.0
       throughput: 1607.717041800643
       estimated_peak_memory_range:
         min: 16384
-        max: 12184136
+        max: 63969296
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j7gjkemx5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5186.0
-      throughput: 192.8268414963363
-      estimated_peak_memory_range:
-        min: 18886656
-        max: 272750360
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 147
-        layers_on_gpu: 0
-        layers_on_cpu: 24
-        total_layers: 171
-      job_id: jmg99welg
+      job_id: jlpe29v7p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:51:35Z'
+    timestamp: '2024-06-22T22:58:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 255.0
-      throughput: 3921.5686274509803
+      inference_time: 254.0
+      throughput: 3937.0078740157483
       estimated_peak_memory_range:
         min: 12288
-        max: 48279952
+        max: 51307808
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: jw56qnj0g
+      job_id: jw56v61vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 451.0
-      throughput: 2217.2949002217297
+      inference_time: 450.0
+      throughput: 2222.222222222222
       estimated_peak_memory_range:
         min: 163840
-        max: 50970896
+        max: 41166592
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jlpe4k115
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 4385.0
-      throughput: 228.05017103762827
-      estimated_peak_memory_range:
-        min: 17133568
-        max: 61050864
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 147
-        layers_on_gpu: 0
-        layers_on_cpu: 24
-        total_layers: 171
-      job_id: jnp1qex2g
+      job_id: jygzwe7zg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:51:36Z'
+    timestamp: '2024-06-22T22:58:16Z'
   - torchscript_onnx_tflite:
-      inference_time: 353.0
-      throughput: 2832.8611898016998
+      inference_time: 356.0
+      throughput: 2808.9887640449438
       estimated_peak_memory_range:
         min: 12288
-        max: 2106960
+        max: 1436016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: j1p3qe3l5
+      job_id: j1p38kmx5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 626.0
       throughput: 1597.444089456869
       estimated_peak_memory_range:
-        min: 184320
-        max: 6302512
+        min: 12288
+        max: 15664936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5wmqn6g
+      job_id: jmg98v4qp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:51:34Z'
+    timestamp: '2024-06-22T22:58:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 1170.0
-      throughput: 854.7008547008547
+      inference_time: 351.0
+      throughput: 2849.002849002849
       estimated_peak_memory_range:
         min: 12288
-        max: 28920160
+        max: 1946592
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: jwgoe30xp
+      job_id: jwgomyv45
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 624.0
+      throughput: 1602.5641025641025
+      estimated_peak_memory_range:
+        min: 16384
+        max: 7871216
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 126
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 126
+      job_id: jnp1308k5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:58:20Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1178.0
+      throughput: 848.8964346349745
+      estimated_peak_memory_range:
+        min: 12288
+        max: 31747312
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 135
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 135
+      job_id: j1pv43w7p
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T22:51:29Z'
+    timestamp: '2024-06-22T22:58:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 6878.0
-      throughput: 145.39110206455365
+      inference_time: 6759.0
+      throughput: 147.95088030773783
       estimated_peak_memory_range:
-        min: 45056
-        max: 2149272
+        min: 16384
+        max: 2477520
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 135
-      job_id: j1pvzvojg
+      job_id: j7gj1xl7g
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T22:51:30Z'
+    timestamp: '2024-06-22T22:58:14Z'
   - torchscript_onnx_qnn:
-      inference_time: 716.0
-      throughput: 1396.6480446927374
+      inference_time: 715.0
+      throughput: 1398.6013986013986
       estimated_peak_memory_range:
-        min: 643072
-        max: 643072
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jygzvr9kp
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 4701.0
-      throughput: 212.72069772388852
-      estimated_peak_memory_range:
-        min: 26042368
-        max: 26042368
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 147
-        layers_on_gpu: 0
-        layers_on_cpu: 24
-        total_layers: 171
-      job_id: jvgd7oleg
+      job_id: jz5wxo9zp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:51:37Z'
+    timestamp: '2024-06-22T22:58:18Z'
diff --git a/qai_hub_models/models/mobilenet_v3_small/export.py b/qai_hub_models/models/mobilenet_v3_small/export.py
index f7fe3203..327d69c9 100644
--- a/qai_hub_models/models/mobilenet_v3_small/export.py
+++ b/qai_hub_models/models/mobilenet_v3_small/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml
index 10857f0f..1ac1f53d 100644
--- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml
+++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: MobileNet-v3-Small
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 835.0
-      throughput: 1197.6047904191616
+      inference_time: 844.0
+      throughput: 1184.8341232227488
       estimated_peak_memory_range:
-        min: 16384
-        max: 1873408
+        min: 28672
+        max: 1989392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 124
+        layers_on_npu: 115
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 124
-      job_id: jqp4jv0vp
+        total_layers: 115
+      job_id: jopr9k4vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 882.0
-      throughput: 1133.7868480725624
+      inference_time: 879.0
+      throughput: 1137.6564277588168
       estimated_peak_memory_range:
-        min: 16384
-        max: 13725872
+        min: 622592
+        max: 4861784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jegnr38r5
+      job_id: j1p88o3zp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 824.0
-      throughput: 1213.5922330097087
+    torchscript_onnx:
+      inference_time: 835.0
+      throughput: 1197.6047904191616
       estimated_peak_memory_range:
-        min: 12288
-        max: 57762312
+        min: 81920
+        max: 86316544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j2p0el765
+      job_id: j1p38k4x5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,28 +89,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:52:03Z'
+    timestamp: '2024-06-22T22:58:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 547.0
-      throughput: 1828.1535648994516
+      inference_time: 557.0
+      throughput: 1795.3321364452424
       estimated_peak_memory_range:
-        min: 12288
-        max: 42129856
+        min: 16384
+        max: 44831456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 124
+        layers_on_npu: 115
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 124
-      job_id: j0pxey215
+        total_layers: 115
+      job_id: jep2j87x5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 583.0
-      throughput: 1715.2658662092624
+      inference_time: 584.0
+      throughput: 1712.3287671232877
       estimated_peak_memory_range:
         min: 0
-        max: 47338784
+        max: 42092592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jopr1ej9g
+      job_id: jogkdzlyp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 586.0
-      throughput: 1706.4846416382252
+    torchscript_onnx:
+      inference_time: 592.0
+      throughput: 1689.1891891891892
       estimated_peak_memory_range:
-        min: 524288
-        max: 27846320
+        min: 618496
+        max: 25216640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j1p8wzvxp
+      job_id: jwgomy145
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,28 +142,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:52:04Z'
+    timestamp: '2024-06-22T22:58:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 832.0
-      throughput: 1201.923076923077
+      inference_time: 841.0
+      throughput: 1189.0606420927468
       estimated_peak_memory_range:
-        min: 24576
-        max: 2336768
+        min: 12288
+        max: 1935832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 124
+        layers_on_npu: 115
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 124
-      job_id: jo5mv3yw5
+        total_layers: 115
+      job_id: jqpyne4rg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 867.0
-      throughput: 1153.4025374855826
+      inference_time: 865.0
+      throughput: 1156.0693641618498
       estimated_peak_memory_range:
-        min: 12288
-        max: 35394896
+        min: 16384
+        max: 156761368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jqpyv607p
+      job_id: j1gl7n0e5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:52:02Z'
+    timestamp: '2024-06-22T22:58:53Z'
+  - torchscript_onnx_tflite:
+      inference_time: 844.0
+      throughput: 1184.8341232227488
+      estimated_peak_memory_range:
+        min: 16384
+        max: 1887288
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 115
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 115
+      job_id: j2p0ky125
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 871.0
+      throughput: 1148.105625717566
+      estimated_peak_memory_range:
+        min: 40960
+        max: 35138480
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 126
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 126
+      job_id: jw56v63vp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T22:58:54Z'
   - torchscript_onnx_qnn:
-      inference_time: 1018.0
-      throughput: 982.3182711198428
+      inference_time: 979.0
+      throughput: 1021.4504596527069
       estimated_peak_memory_range:
-        min: 1249280
-        max: 1249280
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jep23ln4g
+      job_id: jn5qw8775
       job_status: Passed
-    torchscript_onnx_ort:
+    torchscript_onnx:
       inference_time: 879.0
       throughput: 1137.6564277588168
       estimated_peak_memory_range:
-        min: 16596992
-        max: 16596992
+        min: 16412672
+        max: 16412672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jogkr3m25
+      job_id: j1pv4317p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:52:05Z'
+    timestamp: '2024-06-22T22:58:58Z'
diff --git a/qai_hub_models/models/openai_clip/app.py b/qai_hub_models/models/openai_clip/app.py
index 3df122c1..ebdbd415 100644
--- a/qai_hub_models/models/openai_clip/app.py
+++ b/qai_hub_models/models/openai_clip/app.py
@@ -60,10 +60,9 @@ def predict_similarity(
                 by doing a transpose.
 
         """
-        with torch.no_grad():
-            image_features = self.image_encoder(image)
-            text_features = self.text_encoder(text)
-            logits_per_image = image_features @ text_features.t()
+        image_features = self.image_encoder(image)
+        text_features = self.text_encoder(text)
+        logits_per_image = image_features @ text_features.t()
         return logits_per_image.cpu().numpy()
 
     def process_image(self, image: Image) -> torch.Tensor:
diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py
index 08ce7d6a..29a5701b 100644
--- a/qai_hub_models/models/openai_clip/export.py
+++ b/qai_hub_models/models/openai_clip/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/openai_clip/model.py b/qai_hub_models/models/openai_clip/model.py
index 003ae533..a3664c47 100644
--- a/qai_hub_models/models/openai_clip/model.py
+++ b/qai_hub_models/models/openai_clip/model.py
@@ -59,7 +59,6 @@ def from_pretrained():
 
     @staticmethod
     def from_source_model(net, preprocess, tokenizer_func):
-        net = net.eval()
         text_encoder = ClipTextEncoder(net)
         image_encoder = ClipImageEncoder(net)
         return Clip(text_encoder, image_encoder, preprocess, tokenizer_func)
diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml
index 785f83c0..0665b1af 100644
--- a/qai_hub_models/models/openai_clip/perf.yaml
+++ b/qai_hub_models/models/openai_clip/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: CLIPTextEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 13293.0
-      throughput: 75.22756337922215
+      inference_time: 10955.0
+      throughput: 91.28251939753537
       estimated_peak_memory_range:
-        min: 20480
-        max: 3340864
+        min: 65536
+        max: 3179896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: j1gle3r8p
+      job_id: jlpe29r7p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7810.0
-      throughput: 128.04097311139566
+      inference_time: 7761.0
+      throughput: 128.84937508053085
       estimated_peak_memory_range:
-        min: 24576
-        max: 31351376
+        min: 12288
+        max: 18107232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jlpe4k315
+      job_id: jnp130dl5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 31397.0
-      throughput: 31.850176768481067
+    torchscript_onnx:
+      inference_time: 31659.0
+      throughput: 31.586594649230868
       estimated_peak_memory_range:
-        min: 57344
-        max: 324810128
+        min: 16384
+        max: 323261448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 389
-      job_id: j0pxe1o15
+      job_id: j2p0kyee5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:53:04Z'
+    timestamp: '2024-06-22T23:00:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 9408.0
-      throughput: 106.29251700680273
+      inference_time: 7727.0
+      throughput: 129.41633234114144
       estimated_peak_memory_range:
-        min: 36864
-        max: 211531120
+        min: 16384
+        max: 217754880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: j1p3qe2l5
+      job_id: jz5wxodzp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5496.0
-      throughput: 181.9505094614265
+      inference_time: 5506.0
+      throughput: 181.62005085361423
       estimated_peak_memory_range:
         min: 12288
-        max: 143518544
+        max: 128093120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jz5wm966g
+      job_id: jz576zvrg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 22333.0
-      throughput: 44.776787713249455
+    torchscript_onnx:
+      inference_time: 22386.0
+      throughput: 44.67077637809345
       estimated_peak_memory_range:
-        min: 36864
-        max: 188583968
+        min: 53248
+        max: 177699248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 389
-      job_id: jegnrevr5
+      job_id: jogkdzrop
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:53:06Z'
+    timestamp: '2024-06-22T23:00:06Z'
   - torchscript_onnx_tflite:
-      inference_time: 13221.0
-      throughput: 75.6372437788367
+      inference_time: 10895.0
+      throughput: 91.78522257916475
       estimated_peak_memory_range:
-        min: 40960
-        max: 2903592
+        min: 45056
+        max: 3257704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 2
         total_layers: 576
-      job_id: j1pvzvxjg
+      job_id: jnp130dk5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 7775.0
-      throughput: 128.61736334405145
+      inference_time: 7779.0
+      throughput: 128.5512276642242
       estimated_peak_memory_range:
-        min: 16384
-        max: 18711280
+        min: 28672
+        max: 17653280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jz57vdrl5
+      job_id: jegnx2rm5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:53:02Z'
+    timestamp: '2024-06-22T22:59:59Z'
+  - torchscript_onnx_tflite:
+      inference_time: 10968.0
+      throughput: 91.17432530999271
+      estimated_peak_memory_range:
+        min: 36864
+        max: 3506584
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 574
+        layers_on_gpu: 0
+        layers_on_cpu: 2
+        total_layers: 576
+      job_id: jz5wxodjp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 7788.0
+      throughput: 128.40267077555214
+      estimated_peak_memory_range:
+        min: 49152
+        max: 25359208
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 377
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 377
+      job_id: jep2j83m5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:00:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 8431.0
-      throughput: 118.60989206499822
+      inference_time: 8248.0
+      throughput: 121.24151309408342
       estimated_peak_memory_range:
-        min: 159744
-        max: 159744
+        min: 155648
+        max: 155648
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 377
-      job_id: jnp1q8z2g
+      job_id: j0pxmve9g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 32547.0
-      throughput: 30.724797984453254
+    torchscript_onnx:
+      inference_time: 32528.0
+      throughput: 30.74274471224791
       estimated_peak_memory_range:
-        min: 40755200
-        max: 40755200
+        min: 37928960
+        max: 37928960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 389
-      job_id: jep23my4g
+      job_id: j1gl7nel5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +256,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:53:08Z'
+    timestamp: '2024-06-22T23:00:08Z'
 - name: CLIPImageEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 126539.0
-      throughput: 7.902701933791163
+      inference_time: 66863.0
+      throughput: 14.955954713369128
       estimated_peak_memory_range:
-        min: 0
-        max: 273708336
+        min: 16384
+        max: 3191736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +272,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 576
-      job_id: jw56qnl0g
+      job_id: jygzwexzg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50274.0
-      throughput: 19.890997334606357
+      inference_time: 50137.0
+      throughput: 19.94534974170772
       estimated_peak_memory_range:
-        min: 126976
-        max: 66170792
+        min: 90112
+        max: 65745680
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,7 +287,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: jygzvrkkp
+      job_id: jvgd0wrlp
+      job_status: Passed
+    torchscript_onnx:
+      inference_time: 170413.0
+      throughput: 5.868096917488689
+      estimated_peak_memory_range:
+        min: 126976
+        max: 546526336
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 382
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 382
+      job_id: j1p88ow8p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -256,13 +311,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:52:57Z'
+    timestamp: '2024-06-22T23:00:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 96320.0
-      throughput: 10.382059800664452
+      inference_time: 49213.0
+      throughput: 20.31983419015301
       estimated_peak_memory_range:
-        min: 188416
-        max: 752672896
+        min: 32768
+        max: 741033104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -270,14 +325,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 576
-      job_id: jwgoe3qxp
+      job_id: jmg98v3qp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 37784.0
-      throughput: 26.46622909167902
+      inference_time: 37559.0
+      throughput: 26.62477701749248
       estimated_peak_memory_range:
-        min: 634880
-        max: 197848448
+        min: 659456
+        max: 177316416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +340,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: jmg994nlg
+      job_id: jqp48qjlg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 129578.0
-      throughput: 7.717359428298013
+    torchscript_onnx:
+      inference_time: 127814.0
+      throughput: 7.823869059727416
       estimated_peak_memory_range:
-        min: 659456
-        max: 1273480192
+        min: 0
+        max: 1299304416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,7 +355,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 382
-      job_id: jopr1y39g
+      job_id: jn5qw89m5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -309,13 +364,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:53:07Z'
+    timestamp: '2024-06-22T23:00:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 125864.0
-      throughput: 7.945083582279286
+      inference_time: 65049.0
+      throughput: 15.373026487724639
       estimated_peak_memory_range:
-        min: 143360
-        max: 4010376
+        min: 102400
+        max: 3521536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -323,14 +378,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 576
-      job_id: j7gjke4x5
+      job_id: jvgd0wrkp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50577.0
-      throughput: 19.771833046641753
+      inference_time: 50151.0
+      throughput: 19.939781858786464
       estimated_peak_memory_range:
-        min: 77824
-        max: 66028648
+        min: 0
+        max: 56606256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,7 +393,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 371
-      job_id: jqp4jwrvp
+      job_id: jopr9k1ep
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -347,28 +402,66 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:53:03Z'
+    timestamp: '2024-06-22T23:00:00Z'
+  - torchscript_onnx_tflite:
+      inference_time: 65877.0
+      throughput: 15.17980478771043
+      estimated_peak_memory_range:
+        min: 122880
+        max: 3769616
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 576
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 576
+      job_id: jmg98v3vp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 50614.0
+      throughput: 19.757379381198877
+      estimated_peak_memory_range:
+        min: 81920
+        max: 65787296
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 371
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 371
+      job_id: jqpynev4g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:00:02Z'
   - torchscript_onnx_qnn:
-      inference_time: 48611.0
-      throughput: 20.57147559194421
+      inference_time: 36053.0
+      throughput: 27.736942834160818
       estimated_peak_memory_range:
         min: 602112
         max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 369
+        layers_on_npu: 370
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 369
-      job_id: jvgd7v1eg
+        total_layers: 370
+      job_id: jo5m4rvq5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 168455.0
-      throughput: 5.936303463833071
+    torchscript_onnx:
+      inference_time: 169928.0
+      throughput: 5.8848453462643
       estimated_peak_memory_range:
-        min: 468086784
-        max: 468086784
+        min: 536092672
+        max: 536092672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -376,7 +469,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 382
-      job_id: jqpyvd37p
+      job_id: jw56v6q7p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -385,4 +478,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:53:08Z'
+    timestamp: '2024-06-22T23:00:09Z'
diff --git a/qai_hub_models/models/openpose/export.py b/qai_hub_models/models/openpose/export.py
index 18061fc5..96527162 100644
--- a/qai_hub_models/models/openpose/export.py
+++ b/qai_hub_models/models/openpose/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,14 +117,13 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
         + " --force_channel_last_output output_0,output_1"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -210,7 +209,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0,output_1", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/openpose/model.py b/qai_hub_models/models/openpose/model.py
index 6379a270..80835074 100644
--- a/qai_hub_models/models/openpose/model.py
+++ b/qai_hub_models/models/openpose/model.py
@@ -8,7 +8,11 @@
 
 import torch
 
-from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, SourceAsRoot
+from qai_hub_models.utils.asset_loaders import (
+    CachedWebModelAsset,
+    SourceAsRoot,
+    wipe_sys_modules,
+)
 from qai_hub_models.utils.base_model import BaseModel
 from qai_hub_models.utils.input_spec import InputSpec
 
@@ -93,8 +97,7 @@ def forward(self, image: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         im = img_padded.permute(2, 0, 1).unsqueeze(0) - 0.5
 
         # Run the model
-        with torch.no_grad():
-            paf, heatmap = self.model(im)
+        paf, heatmap = self.model(im)
 
         return paf, heatmap
 
@@ -131,9 +134,13 @@ def _load_openpose_source_model_from_weights(
                 MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_WEIGHTS
             ).fetch()
 
+        import src
+
+        wipe_sys_modules(src)
+
         # Import model files from pytorch openpose repo
         from src.body import Body
 
         body_estimation = Body(weights_path_body)
 
-        return body_estimation.model.eval()
+        return body_estimation.model
diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml
index 30e2f47e..64964053 100644
--- a/qai_hub_models/models/openpose/perf.yaml
+++ b/qai_hub_models/models/openpose/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: OpenPose
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 12008.0
-      throughput: 83.27781479013991
+      inference_time: 11731.0
+      throughput: 85.24422470377633
       estimated_peak_memory_range:
-        min: 217088
-        max: 2747920
+        min: 196608
+        max: 120661344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jogkryx25
+      job_id: j1pv43zmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11771.0
-      throughput: 84.95454931611587
+      inference_time: 11790.0
+      throughput: 84.81764206955047
       estimated_peak_memory_range:
         min: 45056
-        max: 240267896
+        max: 230031400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jw56q140g
+      job_id: jz5wxomjp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 11936.0
-      throughput: 83.78016085790885
+    torchscript_onnx:
+      inference_time: 11939.0
+      throughput: 83.75910880308234
       estimated_peak_memory_range:
-        min: 0
-        max: 374382256
+        min: 12288
+        max: 448015016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: j7gjklnx5
+      job_id: jqp48q8lg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:55:03Z'
+    timestamp: '2024-06-22T23:02:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 8742.0
-      throughput: 114.39029970258522
+      inference_time: 8755.0
+      throughput: 114.22044545973729
       estimated_peak_memory_range:
-        min: 12288
-        max: 33837760
+        min: 212992
+        max: 38756416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: jn5q92q4p
+      job_id: j7gj1xk8g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 8755.0
-      throughput: 114.22044545973729
+      inference_time: 8767.0
+      throughput: 114.06410402646287
       estimated_peak_memory_range:
-        min: 618496
-        max: 53012064
+        min: 724992
+        max: 44317808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: j1p3qm0l5
+      job_id: jmg98v9vp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9006.0
-      throughput: 111.0370863868532
+    torchscript_onnx:
+      inference_time: 8927.0
+      throughput: 112.0197154699227
       estimated_peak_memory_range:
-        min: 700416
-        max: 31196368
+        min: 618496
+        max: 25483424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jlpe4vm15
+      job_id: j0pxmvm9g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:55:04Z'
+    timestamp: '2024-06-22T23:02:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 11695.0
-      throughput: 85.50662676357418
+      inference_time: 11730.0
+      throughput: 85.25149190110827
       estimated_peak_memory_range:
-        min: 196608
-        max: 2975008
+        min: 217088
+        max: 2218456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 103
-      job_id: j1glekm8p
+      job_id: jlpe2940p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 11765.0
-      throughput: 84.99787505312368
+      inference_time: 11826.0
+      throughput: 84.5594452900389
       estimated_peak_memory_range:
         min: 12288
-        max: 229599440
+        max: 229489592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: j1pvzwkjg
+      job_id: jvgd0w7lp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:55:02Z'
+    timestamp: '2024-06-22T23:02:10Z'
+  - torchscript_onnx_tflite:
+      inference_time: 11710.0
+      throughput: 85.39709649871904
+      estimated_peak_memory_range:
+        min: 225280
+        max: 2674184
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 103
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 103
+      job_id: jygzwev6g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 11788.0
+      throughput: 84.8320325755005
+      estimated_peak_memory_range:
+        min: 36864
+        max: 240637648
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 186
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 186
+      job_id: jz576z6rg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:02:11Z'
   - torchscript_onnx_qnn:
-      inference_time: 14100.0
-      throughput: 70.92198581560284
+      inference_time: 12303.0
+      throughput: 81.28098837681866
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jwgoev6xp
+      job_id: jnp130ql5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 12365.0
-      throughput: 80.87343307723413
+    torchscript_onnx:
+      inference_time: 12373.0
+      throughput: 80.82114281095934
       estimated_peak_memory_range:
-        min: 88932352
-        max: 88932352
+        min: 93327360
+        max: 93327360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 189
-      job_id: jygzv7dkp
+      job_id: jo5m4r4q5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:55:05Z'
+    timestamp: '2024-06-22T23:02:15Z'
diff --git a/qai_hub_models/models/posenet_mobilenet/app.py b/qai_hub_models/models/posenet_mobilenet/app.py
index 72933415..b0f03508 100644
--- a/qai_hub_models/models/posenet_mobilenet/app.py
+++ b/qai_hub_models/models/posenet_mobilenet/app.py
@@ -556,14 +556,13 @@ def predict_pose_keypoints(
         tensor = tensor.reshape(1, 3, self.input_height, self.input_width)
 
         np.save("build/posenet_inputs", tensor.numpy())
-        with torch.no_grad():
-            (
-                heatmaps_result,
-                offsets_result,
-                displacement_fwd_result,
-                displacement_bwd_result,
-                max_vals,
-            ) = self.model(tensor)
+        (
+            heatmaps_result,
+            offsets_result,
+            displacement_fwd_result,
+            displacement_bwd_result,
+            max_vals,
+        ) = self.model(tensor)
         pose_scores, keypoint_scores, keypoint_coords = decode_multiple_poses(
             heatmaps_result.squeeze(0),
             offsets_result.squeeze(0),
diff --git a/qai_hub_models/models/posenet_mobilenet/export.py b/qai_hub_models/models/posenet_mobilenet/export.py
index e178937d..9e1a33bd 100644
--- a/qai_hub_models/models/posenet_mobilenet/export.py
+++ b/qai_hub_models/models/posenet_mobilenet/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/posenet_mobilenet/model.py b/qai_hub_models/models/posenet_mobilenet/model.py
index c4f17782..aec731c2 100644
--- a/qai_hub_models/models/posenet_mobilenet/model.py
+++ b/qai_hub_models/models/posenet_mobilenet/model.py
@@ -60,7 +60,7 @@ def from_pretrained(
 
             model = posenet.load_model(model_id)
 
-            return cls(model).eval()
+            return cls(model)
 
     def forward(self, image):
         """
diff --git a/qai_hub_models/models/posenet_mobilenet/perf.yaml b/qai_hub_models/models/posenet_mobilenet/perf.yaml
index e405cdc0..d56b85f5 100644
--- a/qai_hub_models/models/posenet_mobilenet/perf.yaml
+++ b/qai_hub_models/models/posenet_mobilenet/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Posenet-Mobilenet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1387.0
-      throughput: 720.9805335255949
+      inference_time: 1412.0
+      throughput: 708.2152974504249
       estimated_peak_memory_range:
-        min: 12288
-        max: 1654968
+        min: 16384
+        max: 1532880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jmg994llg
+      job_id: jopr9k9ep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1439.0
-      throughput: 694.9270326615705
+      inference_time: 1450.0
+      throughput: 689.6551724137931
       estimated_peak_memory_range:
-        min: 20480
-        max: 24010176
+        min: 16384
+        max: 13707448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jz57vdyl5
+      job_id: j1p88o88p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2086.0
-      throughput: 479.3863854266539
+    torchscript_onnx:
+      inference_time: 2131.0
+      throughput: 469.2632566870014
       estimated_peak_memory_range:
-        min: 12288
-        max: 25676680
+        min: 8192
+        max: 30695984
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jegnre6r5
+      job_id: j1p38k8z5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:55:33Z'
+    timestamp: '2024-06-22T23:02:46Z'
   - torchscript_onnx_tflite:
-      inference_time: 977.0
-      throughput: 1023.5414534288639
+      inference_time: 970.0
+      throughput: 1030.9278350515465
       estimated_peak_memory_range:
         min: 12288
-        max: 36616768
+        max: 38499008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jnp1q842g
+      job_id: jep2j8jm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1010.0
-      throughput: 990.0990099009902
+      inference_time: 1013.0
+      throughput: 987.1668311944719
       estimated_peak_memory_range:
         min: 1597440
-        max: 36578000
+        max: 34127872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jqp4jwlvp
+      job_id: jogkdzdop
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1404.0
-      throughput: 712.2507122507122
+    torchscript_onnx:
+      inference_time: 1455.0
+      throughput: 687.2852233676975
       estimated_peak_memory_range:
-        min: 1597440
-        max: 24142448
+        min: 1175552
+        max: 23896144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jopr1yv9g
+      job_id: jwgomymd5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:55:34Z'
+    timestamp: '2024-06-22T23:02:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 1388.0
-      throughput: 720.4610951008646
+      inference_time: 1389.0
+      throughput: 719.9424046076314
       estimated_peak_memory_range:
         min: 12288
-        max: 1476976
+        max: 3217944
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jvgd7vxeg
+      job_id: jqpynen4g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1447.0
-      throughput: 691.0850034554251
+      inference_time: 1446.0
+      throughput: 691.5629322268327
       estimated_peak_memory_range:
         min: 16384
-        max: 13954296
+        max: 149700328
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: jo5mvznw5
+      job_id: j1gl7n7l5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:55:32Z'
+    timestamp: '2024-06-22T23:02:43Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1393.0
+      throughput: 717.8750897343862
+      estimated_peak_memory_range:
+        min: 12288
+        max: 2044968
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 41
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 41
+      job_id: j2p0kyke5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1449.0
+      throughput: 690.1311249137336
+      estimated_peak_memory_range:
+        min: 16384
+        max: 159650832
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 69
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 69
+      job_id: jw56v6v7p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:02:44Z'
   - torchscript_onnx_qnn:
-      inference_time: 1751.0
-      throughput: 571.1022272986864
+      inference_time: 1549.0
+      throughput: 645.577792123951
       estimated_peak_memory_range:
         min: 1589248
         max: 1589248
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 69
-      job_id: j0pxe1k15
+      job_id: jn5qw8wm5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2129.0
-      throughput: 469.7040864255519
+    torchscript_onnx:
+      inference_time: 2165.0
+      throughput: 461.8937644341801
       estimated_peak_memory_range:
-        min: 151552
-        max: 151552
+        min: 229376
+        max: 229376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jep23mk4g
+      job_id: j1pv434mp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:55:35Z'
+    timestamp: '2024-06-22T23:02:48Z'
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/README.md b/qai_hub_models/models/posenet_mobilenet_quantized/README.md
new file mode 100644
index 00000000..00394618
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/README.md
@@ -0,0 +1,56 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [Posenet-Mobilenet-Quantized: Quantized human pose estimator](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized)
+
+Posenet performs pose estimation on human images.
+
+This is based on the implementation of Posenet-Mobilenet-Quantized found
+[here](https://github.com/rwightman/posenet-pytorch). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+
+
+## Example & Usage
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.posenet_mobilenet_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.posenet_mobilenet_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of Posenet-Mobilenet-Quantized can be found
+  [here](https://github.com/rwightman/posenet-pytorch/blob/master/LICENSE.txt).
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
+
+## References
+* [PersonLab: Person Pose Estimation and Instance Segmentation with a Bottom-Up, Part-Based, Geometric Embedding Model](https://arxiv.org/abs/1803.08225)
+* [Source Model Implementation](https://github.com/rwightman/posenet-pytorch)
+
+## Community
+* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/__init__.py b/qai_hub_models/models/posenet_mobilenet_quantized/__init__.py
new file mode 100644
index 00000000..9d489997
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/__init__.py
@@ -0,0 +1,8 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models.posenet_mobilenet.app import PosenetApp  # noqa: F401
+
+from .model import MODEL_ID  # noqa: F401
+from .model import PosenetMobilenetQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/conftest.py b/qai_hub_models/models/posenet_mobilenet_quantized/conftest.py
new file mode 100644
index 00000000..c3a05810
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/conftest.py
@@ -0,0 +1,39 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+import inspect
+
+import pytest
+
+from qai_hub_models.models.posenet_mobilenet_quantized import Model
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+
+# Instantiate the model only once for all tests.
+# Mock from_pretrained to always return the initialized model.
+# This speeds up tests and limits memory leaks.
+@pytest.fixture(scope="module", autouse=True)
+def cached_from_pretrained():
+    with pytest.MonkeyPatch.context() as mp:
+        pretrained_cache = {}
+        from_pretrained = Model.from_pretrained
+        sig = inspect.signature(from_pretrained)
+
+        @skip_clone_repo_check
+        def _cached_from_pretrained(*args, **kwargs):
+            cache_key = str(args) + str(kwargs)
+            model = pretrained_cache.get(cache_key, None)
+            if model:
+                return model
+            else:
+                model = from_pretrained(*args, **kwargs)
+                pretrained_cache[cache_key] = model
+                return model
+
+        _cached_from_pretrained.__signature__ = sig
+
+        mp.setattr(Model, "from_pretrained", _cached_from_pretrained)
+        yield mp
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/demo.py b/qai_hub_models/models/posenet_mobilenet_quantized/demo.py
new file mode 100644
index 00000000..0bd949f0
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/demo.py
@@ -0,0 +1,18 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+from qai_hub_models.models.posenet_mobilenet.demo import posenet_demo
+from qai_hub_models.models.posenet_mobilenet_quantized.model import (
+    PosenetMobilenetQuantizable,
+)
+
+
+def main(is_test: bool = False):
+    return posenet_demo(PosenetMobilenetQuantizable, is_test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/export.py b/qai_hub_models/models/posenet_mobilenet_quantized/export.py
new file mode 100644
index 00000000..25b5f26b
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/export.py
@@ -0,0 +1,232 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.posenet_mobilenet_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23 (Family)",
+    chipset: Optional[str] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        chipset: If set, will choose a random device with this chipset.
+            Overrides the `device` argument.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "posenet_mobilenet_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if chipset:
+        hub_device = hub.Device(attributes=f"chipset:{chipset}")
+    else:
+        hub_device = hub.Device(name=device)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "posenet_mobilenet_quantized",
+            "Posenet-Mobilenet-Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
+    channel_last_flags = (
+        " --force_channel_last_input image"
+        if target_runtime != TargetRuntime.ONNX
+        else ""
+    )
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options + channel_last_flags, hub_device
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub_device,
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = (
+            sample_inputs
+            if target_runtime == TargetRuntime.ONNX
+            else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = "so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = "tflite"
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
+            target_runtime_extension = "onnx"
+
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        print_inference_metrics(inference_job, inference_result, torch_out)
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/info.yaml b/qai_hub_models/models/posenet_mobilenet_quantized/info.yaml
new file mode 100644
index 00000000..3124e297
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/info.yaml
@@ -0,0 +1,40 @@
+name: Posenet-Mobilenet-Quantized
+# id must match with the model dir name in qai_hub_models
+id: posenet_mobilenet_quantized
+status: public
+headline: Quantized human pose estimator.
+domain: Computer Vision
+use_case: Pose Estimation
+description: Posenet performs pose estimation on human images.
+tags:
+  - quantized
+research_paper: https://arxiv.org/abs/1803.08225
+research_paper_title: 'PersonLab: Person Pose Estimation and Instance Segmentation
+  with a Bottom-Up, Part-Based, Geometric Embedding Model'
+license: https://github.com/rwightman/posenet-pytorch/blob/master/LICENSE.txt
+deploy_license:
+  https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/rwightman/posenet-pytorch
+technical_details:
+  Model checkpoint: mobilenet_v1_101
+  Input resolution: 513x257
+  Number of parameters: 3.31M
+  Model size: 3.47 MB
+applicable_scenarios:
+  - Injury prevention training
+  - Sports performance analysis
+  - Posture recognition
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+related_models:
+  - litehrnet
+  - openpose
+  - hrnet_pose
+has_static_banner: yes
+has_animated_banner: yes
+license_type: apache-2.0
+deploy_license_type: AI Model Hub License
+dataset:
+  - coco
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/model.py b/qai_hub_models/models/posenet_mobilenet_quantized/model.py
new file mode 100644
index 00000000..d7a25275
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/model.py
@@ -0,0 +1,87 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+    tie_observers,
+    constrain_quantized_inputs_to_image_range,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.batch_norm_fold import fold_all_batch_norms
+from aimet_torch.cross_layer_equalization import CrossLayerScaling
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.posenet_mobilenet.model import PosenetMobilenet
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 1
+DEFAULT_ENCODINGS = "posenet_mobilenet_quantized_encodings.json"
+
+
+class PosenetMobilenetQuantizable(AIMETQuantizableMixin, PosenetMobilenet):
+    """
+    PosenetMobilenet with post training quantization support
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date.
+    """
+
+    def __init__(
+        self,
+        model: QuantizationSimModel,
+    ) -> None:
+        PosenetMobilenet.__init__(self, model.model)
+        AIMETQuantizableMixin.__init__(self, model)
+
+    @classmethod
+    def from_pretrained(
+        cls, aimet_encodings: str | None = "DEFAULT"
+    ) -> PosenetMobilenetQuantizable:
+        model = PosenetMobilenet.from_pretrained()
+        input_shape = PosenetMobilenet.get_input_spec()["image"][0]
+        dummy_input = torch.rand(input_shape)
+
+        model = prepare_model(model)
+        fold_all_batch_norms(model, input_shape, dummy_input)
+        CrossLayerScaling.scale_model(model, input_shape, dummy_input)
+
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=dummy_input,
+        )
+        tie_observers(sim)
+        constrain_quantized_inputs_to_image_range(sim)
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        final_model = cls(sim)
+        return final_model
+
+    def forward(self, image: torch.Tensor):
+        """
+        Run PosenetMobilenetQuantizable on `image`, and produce a
+            predicted set of keypoints.
+
+        See PosenetMobilenet model for details.
+        """
+        return self.model(image)
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml
new file mode 100644
index 00000000..4b215629
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml
@@ -0,0 +1,265 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - QCS8250 (Proxy)
+  - QCS8550 (Proxy)
+  - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy S24+
+  - Samsung Galaxy Tab S8
+  - Snapdragon X Elite CRD
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Qcs8250
+  - Qcs8550
+  - Sa8540p
+  - Sa8775p
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+  - Snapdragon® X Elite
+models:
+- name: Posenet-Mobilenet-Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 591.0
+      throughput: 1692.047377326565
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1938584
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jlpe2920p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 622.0
+      throughput: 1607.717041800643
+      estimated_peak_memory_range:
+        min: 16384
+        max: 9093504
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jz576zkrg
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-06-22T23:03:19Z'
+  - torchscript_onnx_tflite:
+      inference_time: 403.0
+      throughput: 2481.3895781637716
+      estimated_peak_memory_range:
+        min: 12288
+        max: 47502864
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jygzwew6g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 433.0
+      throughput: 2309.4688221709007
+      estimated_peak_memory_range:
+        min: 409600
+        max: 31954752
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jqp48qmlg
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-06-22T23:03:21Z'
+  - torchscript_onnx_tflite:
+      inference_time: 594.0
+      throughput: 1683.5016835016836
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1688024
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jz5wxoxjp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 616.0
+      throughput: 1623.3766233766235
+      estimated_peak_memory_range:
+        min: 12288
+        max: 15322296
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jo5m4roq5
+      job_status: Passed
+    reference_device_info:
+      name: QCS8550 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8550
+    timestamp: '2024-06-22T23:03:23Z'
+  - torchscript_onnx_tflite:
+      inference_time: 573.0
+      throughput: 1745.2006980802792
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1835216
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jmg98v8vp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 624.0
+      throughput: 1602.5641025641025
+      estimated_peak_memory_range:
+        min: 413696
+        max: 12761480
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jegnx2om5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:03:24Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2251.0
+      throughput: 444.247001332741
+      estimated_peak_memory_range:
+        min: 12288
+        max: 26289056
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jnp1303l5
+      job_status: Passed
+    reference_device_info:
+      name: RB3 Gen 2 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs6490
+    timestamp: '2024-06-22T23:03:17Z'
+  - torchscript_onnx_tflite:
+      inference_time: 11775.0
+      throughput: 84.92569002123142
+      estimated_peak_memory_range:
+        min: 528384
+        max: 7493424
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 39
+        layers_on_gpu: 3
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: jvgd0w0lp
+      job_status: Passed
+    reference_device_info:
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8250
+    timestamp: '2024-06-22T23:03:18Z'
+  - torchscript_onnx_qnn:
+      inference_time: 690.0
+      throughput: 1449.2753623188405
+      estimated_peak_memory_range:
+        min: 397312
+        max: 397312
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 42
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 42
+      job_id: j0pxmv39g
+      job_status: Passed
+    reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-06-22T23:03:22Z'
diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/test.py b/qai_hub_models/models/posenet_mobilenet_quantized/test.py
new file mode 100644
index 00000000..84567ec1
--- /dev/null
+++ b/qai_hub_models/models/posenet_mobilenet_quantized/test.py
@@ -0,0 +1,53 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+import numpy as np
+
+from qai_hub_models.models.posenet_mobilenet.app import PosenetApp
+from qai_hub_models.models.posenet_mobilenet.demo import IMAGE_ADDRESS
+from qai_hub_models.models.posenet_mobilenet_quantized.demo import main as demo_main
+from qai_hub_models.models.posenet_mobilenet_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    PosenetMobilenetQuantizable,
+)
+from qai_hub_models.utils.asset_loaders import (
+    CachedWebModelAsset,
+    load_image,
+    load_numpy,
+)
+from qai_hub_models.utils.testing import skip_clone_repo_check
+
+KEYPOINT_SCORES_GT = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "keypoint_scores_gt.npy"
+)
+KEYPOINT_COORDS_GT = CachedWebModelAsset.from_asset_store(
+    MODEL_ID, MODEL_ASSET_VERSION, "keypoint_coords_gt.npy"
+)
+
+
+@skip_clone_repo_check
+def test_task():
+    image = load_image(IMAGE_ADDRESS)
+    model = PosenetMobilenetQuantizable.from_pretrained()
+    h, w = PosenetMobilenetQuantizable.get_input_spec()["image"][0][2:]
+    app = PosenetApp(model, h, w)
+    pose_scores, keypoint_scores, keypoint_coords = app.predict(image, raw_output=True)
+
+    assert pose_scores[0] >= 0.5
+    assert pose_scores[1] >= 0.5
+    for score in pose_scores[2:]:
+        assert score < 1e-4
+
+    np.testing.assert_allclose(
+        keypoint_scores, load_numpy(KEYPOINT_SCORES_GT), atol=1e-3, rtol=0.05
+    )
+    np.testing.assert_allclose(
+        keypoint_coords, load_numpy(KEYPOINT_COORDS_GT), atol=1e-3, rtol=0.05
+    )
+
+
+@skip_clone_repo_check
+def test_demo():
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/quicksrnetlarge/export.py b/qai_hub_models/models/quicksrnetlarge/export.py
index 5f3ec808..50258050 100644
--- a/qai_hub_models/models/quicksrnetlarge/export.py
+++ b/qai_hub_models/models/quicksrnetlarge/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/quicksrnetlarge/info.yaml b/qai_hub_models/models/quicksrnetlarge/info.yaml
index b139e358..5d42ebdf 100644
--- a/qai_hub_models/models/quicksrnetlarge/info.yaml
+++ b/qai_hub_models/models/quicksrnetlarge/info.yaml
@@ -17,7 +17,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_large_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 424K
   Model size: 1.63 MB
 applicable_scenarios:
diff --git a/qai_hub_models/models/quicksrnetlarge/model.py b/qai_hub_models/models/quicksrnetlarge/model.py
index 6a83e660..2f4fa63d 100644
--- a/qai_hub_models/models/quicksrnetlarge/model.py
+++ b/qai_hub_models/models/quicksrnetlarge/model.py
@@ -46,6 +46,5 @@ def from_pretrained(
         )
         checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
-        model.eval()
 
         return cls(model, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml
index 4ca0e8f0..4c10c066 100644
--- a/qai_hub_models/models/quicksrnetlarge/perf.yaml
+++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: QuickSRNetLarge
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2412.0
-      throughput: 414.5936981757877
+      inference_time: 2439.0
+      throughput: 410.0041000410004
       estimated_peak_memory_range:
-        min: 28672
-        max: 1429016
+        min: 24576
+        max: 1394872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: j1gle1ojp
+      job_id: jn5qw8zm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2108.0
-      throughput: 474.3833017077799
+      inference_time: 2109.0
+      throughput: 474.158368895211
       estimated_peak_memory_range:
-        min: 229376
-        max: 5466776
+        min: 217088
+        max: 65739352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jwgoe4oqp
+      job_id: jwgomyod5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2712.0
-      throughput: 368.7315634218289
+    torchscript_onnx:
+      inference_time: 2642.0
+      throughput: 378.5011355034065
       estimated_peak_memory_range:
-        min: 16384
-        max: 20834136
+        min: 12288
+        max: 6581808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jygzv44op
+      job_id: jz5wxo8jp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:56:08Z'
+    timestamp: '2024-06-22T23:03:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 1740.0
-      throughput: 574.7126436781609
+      inference_time: 1779.0
+      throughput: 562.1135469364812
       estimated_peak_memory_range:
         min: 16384
-        max: 29572928
+        max: 30262592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jw56qdr6g
+      job_id: j1gl7nol5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1500.0
-      throughput: 666.6666666666666
+      inference_time: 1498.0
+      throughput: 667.5567423230974
       estimated_peak_memory_range:
         min: 204800
-        max: 21850576
+        max: 20871824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j1pvz99kg
+      job_id: j1pv43emp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1855.0
-      throughput: 539.0835579514825
+    torchscript_onnx:
+      inference_time: 1812.0
+      throughput: 551.8763796909492
       estimated_peak_memory_range:
         min: 212992
-        max: 19290704
+        max: 18814624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jz5wm113g
+      job_id: jmg98vkvp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:56:09Z'
+    timestamp: '2024-06-22T23:04:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 2478.0
-      throughput: 403.5512510088781
+      inference_time: 2437.0
+      throughput: 410.3405826836274
       estimated_peak_memory_range:
-        min: 24576
-        max: 1690672
+        min: 28672
+        max: 2322816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: j1p3qwx35
+      job_id: jw56v6r7p
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 2101.0
       throughput: 475.9638267491671
       estimated_peak_memory_range:
-        min: 221184
-        max: 5373456
+        min: 12288
+        max: 72339216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jlpe4llo5
+      job_id: jlpe2980p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:56:07Z'
+    timestamp: '2024-06-22T23:03:56Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2451.0
+      throughput: 407.9967360261118
+      estimated_peak_memory_range:
+        min: 24576
+        max: 17913792
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 28
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 31
+      job_id: j1p38kxz5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2101.0
+      throughput: 475.9638267491671
+      estimated_peak_memory_range:
+        min: 212992
+        max: 21803552
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 31
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 31
+      job_id: jygzwe86g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:03:57Z'
   - torchscript_onnx_qnn:
-      inference_time: 2949.0
-      throughput: 339.097999321804
+      inference_time: 2272.0
+      throughput: 440.14084507042253
       estimated_peak_memory_range:
-        min: 204800
-        max: 204800
+        min: 212992
+        max: 212992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j7gjkwwv5
+      job_id: j7gj1xo8g
       job_status: Passed
-    torchscript_onnx_ort:
+    torchscript_onnx:
       inference_time: 2692.0
       throughput: 371.4710252600297
       estimated_peak_memory_range:
-        min: 13115392
-        max: 13115392
+        min: 12689408
+        max: 12689408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jmg99xxwg
+      job_id: jnp1307l5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:56:10Z'
+    timestamp: '2024-06-22T23:04:01Z'
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py
index 4b832a72..15495fbd 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/export.py
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -127,7 +127,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -177,7 +177,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -217,7 +217,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
index 897f7e5f..613f8381 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml
@@ -18,7 +18,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_large_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 424K
   Model size: 449 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py
index 4767a779..6a5a8a69 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/model.py
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py
@@ -76,6 +76,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
-
         return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
index 6fc4336f..1ca35c3b 100644
--- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
+++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: QuickSRNetLarge-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1324.0
-      throughput: 755.2870090634441
+      inference_time: 1329.0
+      throughput: 752.4454477050414
       estimated_peak_memory_range:
-        min: 12288
-        max: 2457016
+        min: 24576
+        max: 69497376
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jvgd7zzrg
+      job_id: jz576z7rg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1159.0
-      throughput: 862.8127696289905
+      inference_time: 1155.0
+      throughput: 865.8008658008658
       estimated_peak_memory_range:
-        min: 77824
-        max: 3860912
+        min: 28672
+        max: 3835016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jz57v7795
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1039.0
-      throughput: 962.4639076034649
-      estimated_peak_memory_range:
-        min: 69632
-        max: 4717016
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 22
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 22
-      job_id: jegnr77q5
+      job_id: jep2j8vm5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:56:37Z'
+    timestamp: '2024-06-22T23:04:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 1024.0
-      throughput: 976.5625
+      inference_time: 1071.0
+      throughput: 933.7068160597572
       estimated_peak_memory_range:
-        min: 49152
-        max: 25834320
+        min: 16384
+        max: 26377568
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jz5wm11mg
+      job_id: jqp48q9lg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 812.0
-      throughput: 1231.527093596059
+      inference_time: 806.0
+      throughput: 1240.6947890818858
       estimated_peak_memory_range:
-        min: 7340032
-        max: 27038272
+        min: 12288
+        max: 18885968
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jqp4j991p
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 776.0
-      throughput: 1288.659793814433
-      estimated_peak_memory_range:
-        min: 36864
-        max: 17135056
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 22
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 22
-      job_id: jopr1nn7g
+      job_id: jqpyne74g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:56:37Z'
+    timestamp: '2024-06-22T23:04:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 1364.0
-      throughput: 733.1378299120234
+      inference_time: 1330.0
+      throughput: 751.8796992481203
       estimated_peak_memory_range:
         min: 16384
-        max: 1375064
+        max: 1486272
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jmg99xx8g
+      job_id: j0pxmvd9g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1156.0
-      throughput: 865.0519031141869
+      inference_time: 1154.0
+      throughput: 866.5511265164645
       estimated_peak_memory_range:
-        min: 94208
-        max: 9070680
+        min: 20480
+        max: 3931656
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jo5mvdd95
+      job_id: j1p88o48p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:56:36Z'
+    timestamp: '2024-06-22T23:04:32Z'
   - torchscript_onnx_tflite:
-      inference_time: 3979.0
-      throughput: 251.31942699170645
+      inference_time: 1398.0
+      throughput: 715.307582260372
       estimated_peak_memory_range:
-        min: 12288
-        max: 18592624
+        min: 28672
+        max: 63302504
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jnp1qvv7g
+      job_id: jo5m4rdq5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1158.0
+      throughput: 863.5578583765113
+      estimated_peak_memory_range:
+        min: 73728
+        max: 8456712
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 19
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 19
+      job_id: jogkdz9op
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:04:33Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3627.0
+      throughput: 275.70995312930796
+      estimated_peak_memory_range:
+        min: 36864
+        max: 19522368
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 28
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 31
+      job_id: jegnx27m5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-11T11:56:31Z'
+    timestamp: '2024-06-22T23:04:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 32895.0
-      throughput: 30.399756801945585
+      inference_time: 34026.0
+      throughput: 29.389290542526304
       estimated_peak_memory_range:
-        min: 4079616
-        max: 6087016
+        min: 258048
+        max: 7087952
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 31
-      job_id: jvgd7zzzg
+      job_id: jopr9knep
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-11T11:56:32Z'
+    timestamp: '2024-06-22T23:04:26Z'
   - torchscript_onnx_qnn:
-      inference_time: 1008.0
-      throughput: 992.063492063492
+      inference_time: 1065.0
+      throughput: 938.9671361502348
       estimated_peak_memory_range:
-        min: 90112
-        max: 90112
+        min: 57344
+        max: 57344
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: j0pxeddl5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1090.0
-      throughput: 917.4311926605504
-      estimated_peak_memory_range:
-        min: 4714496
-        max: 4714496
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 22
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 22
-      job_id: jep23vvqg
+      job_id: j2p0kyve5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:56:38Z'
+    timestamp: '2024-06-22T23:04:31Z'
diff --git a/qai_hub_models/models/quicksrnetmedium/export.py b/qai_hub_models/models/quicksrnetmedium/export.py
index 20dca067..5cae8684 100644
--- a/qai_hub_models/models/quicksrnetmedium/export.py
+++ b/qai_hub_models/models/quicksrnetmedium/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/quicksrnetmedium/info.yaml b/qai_hub_models/models/quicksrnetmedium/info.yaml
index 9f0a95c6..88490a4f 100644
--- a/qai_hub_models/models/quicksrnetmedium/info.yaml
+++ b/qai_hub_models/models/quicksrnetmedium/info.yaml
@@ -17,7 +17,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_medium_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 55.0K
   Model size: 220 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/quicksrnetmedium/model.py b/qai_hub_models/models/quicksrnetmedium/model.py
index 65c91c46..0b35d777 100644
--- a/qai_hub_models/models/quicksrnetmedium/model.py
+++ b/qai_hub_models/models/quicksrnetmedium/model.py
@@ -46,6 +46,5 @@ def from_pretrained(
         )
         checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
-        model.eval()
 
         return cls(model, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml
index a70a9366..b5095b48 100644
--- a/qai_hub_models/models/quicksrnetmedium/perf.yaml
+++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: QuickSRNetMedium
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1343.0
-      throughput: 744.6016381236038
+      inference_time: 1382.0
+      throughput: 723.589001447178
       estimated_peak_memory_range:
-        min: 16384
-        max: 1439320
+        min: 24576
+        max: 1576320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: j2p0evvn5
+      job_id: j7gj1xw8g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 988.0
-      throughput: 1012.1457489878543
+      inference_time: 998.0
+      throughput: 1002.0040080160321
       estimated_peak_memory_range:
-        min: 12288
-        max: 2409584
+        min: 212992
+        max: 67862072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jw56qdwyg
+      job_id: jmg98vxvp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1506.0
-      throughput: 664.0106241699867
+    torchscript_onnx:
+      inference_time: 1576.0
+      throughput: 634.5177664974619
       estimated_peak_memory_range:
         min: 217088
-        max: 3451560
+        max: 68562816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jygzv4nxp
+      job_id: jnp130625
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:57:01Z'
+    timestamp: '2024-06-22T23:05:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 898.0
-      throughput: 1113.5857461024498
+      inference_time: 1004.0
+      throughput: 996.01593625498
       estimated_peak_memory_range:
-        min: 20480
-        max: 20940320
+        min: 16384
+        max: 21443360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: j1p8w44op
+      job_id: jlpe29l0p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 645.0
-      throughput: 1550.3875968992247
+      inference_time: 654.0
+      throughput: 1529.051987767584
       estimated_peak_memory_range:
         min: 208896
-        max: 17163888
+        max: 16681856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jwgoe48kp
+      job_id: jnp130vl5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1070.0
-      throughput: 934.5794392523364
+    torchscript_onnx:
+      inference_time: 1040.0
+      throughput: 961.5384615384615
       estimated_peak_memory_range:
         min: 212992
-        max: 13764384
+        max: 13277408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jz5wm14mg
+      job_id: jvgd0w2ep
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:57:02Z'
+    timestamp: '2024-06-22T23:05:08Z'
   - torchscript_onnx_tflite:
-      inference_time: 1369.0
-      throughput: 730.4601899196493
+      inference_time: 1390.0
+      throughput: 719.4244604316547
       estimated_peak_memory_range:
-        min: 24576
-        max: 1342320
+        min: 12673024
+        max: 14455856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jn5q9mmop
+      job_id: jygzwe46g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1010.0
-      throughput: 990.0990099009902
+      inference_time: 999.0
+      throughput: 1001.001001001001
       estimated_peak_memory_range:
-        min: 221184
-        max: 7892152
+        min: 28672
+        max: 12587064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jlpe4lyv5
+      job_id: jz5wxo46p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:57:00Z'
+    timestamp: '2024-06-22T23:05:05Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1467.0
+      throughput: 681.6632583503749
+      estimated_peak_memory_range:
+        min: 28672
+        max: 16733008
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 14
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 17
+      job_id: jz5wxo1jp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1008.0
+      throughput: 992.063492063492
+      estimated_peak_memory_range:
+        min: 212992
+        max: 67601112
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 17
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 17
+      job_id: jmg98vdlp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:05:06Z'
   - torchscript_onnx_qnn:
-      inference_time: 1066.0
-      throughput: 938.0863039399625
+      inference_time: 1136.0
+      throughput: 880.2816901408451
       estimated_peak_memory_range:
         min: 204800
         max: 204800
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: j7gjkwqe5
+      job_id: jvgd0wzlp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1498.0
-      throughput: 667.5567423230974
+    torchscript_onnx:
+      inference_time: 1507.0
+      throughput: 663.5700066357001
       estimated_peak_memory_range:
-        min: 9003008
-        max: 9003008
+        min: 9035776
+        max: 9035776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 19
-      job_id: jmg99xd8g
+      job_id: jz576z9lg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:57:03Z'
+    timestamp: '2024-06-22T23:05:09Z'
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py
index fa37875f..da366568 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/export.py
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -127,7 +127,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -177,7 +177,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -217,7 +217,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
index e17071f4..f0d86e74 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml
@@ -18,7 +18,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_medium_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 55.0K
   Model size: 67.2 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py
index e16d87c5..72457952 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/model.py
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py
@@ -76,6 +76,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
-
         return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
index 72df875e..ce4d297d 100644
--- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
+++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: QuickSRNetMedium-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1000.0
-      throughput: 1000.0
+      inference_time: 997.0
+      throughput: 1003.0090270812437
       estimated_peak_memory_range:
-        min: 12288
-        max: 5493824
+        min: 16384
+        max: 1924136
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jvgd7z2zg
+      job_id: j0pxmvx1g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 803.0
-      throughput: 1245.3300124533
+      inference_time: 804.0
+      throughput: 1243.7810945273632
       estimated_peak_memory_range:
-        min: 16384
-        max: 10291792
+        min: 20480
+        max: 2668672
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jegnr7kq5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 757.0
-      throughput: 1321.003963011889
-      estimated_peak_memory_range:
-        min: 65536
-        max: 19746264
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 14
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 14
-      job_id: j2p0ev6n5
+      job_id: j2p0ky665
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:57:28Z'
+    timestamp: '2024-06-22T23:05:34Z'
   - torchscript_onnx_tflite:
-      inference_time: 814.0
-      throughput: 1228.5012285012285
+      inference_time: 949.0
+      throughput: 1053.740779768177
       estimated_peak_memory_range:
-        min: 12288
-        max: 20707552
+        min: 16384
+        max: 21043280
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jz57v7995
+      job_id: jo5m4r8w5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 546.0
-      throughput: 1831.5018315018315
+      inference_time: 548.0
+      throughput: 1824.8175182481752
       estimated_peak_memory_range:
         min: 65536
-        max: 14574352
+        max: 15605120
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jopr1nw7g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 558.0
-      throughput: 1792.1146953405018
-      estimated_peak_memory_range:
-        min: 65536
-        max: 12140448
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 14
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 14
-      job_id: j1p8w41op
+      job_id: j1p88o1xp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:57:29Z'
+    timestamp: '2024-06-22T23:05:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 995.0
-      throughput: 1005.0251256281407
+      inference_time: 1019.0
+      throughput: 981.3542688910696
       estimated_peak_memory_range:
         min: 24576
-        max: 3118760
+        max: 1393312
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jqp4j931p
+      job_id: jegnx2kr5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 800.0
-      throughput: 1250.0
+      inference_time: 801.0
+      throughput: 1248.4394506866417
       estimated_peak_memory_range:
-        min: 16384
-        max: 18363240
+        min: 65536
+        max: 10272704
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jqpyv7mlp
+      job_id: jn5qw8v45
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:57:27Z'
+    timestamp: '2024-06-22T23:05:38Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1011.0
+      throughput: 989.1196834817013
+      estimated_peak_memory_range:
+        min: 36864
+        max: 3142544
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 14
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 17
+      job_id: jopr9kw9p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 802.0
+      throughput: 1246.8827930174564
+      estimated_peak_memory_range:
+        min: 12288
+        max: 69736248
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 11
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 11
+      job_id: j1gl7nl85
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:05:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 1968.0
-      throughput: 508.130081300813
+      inference_time: 1906.0
+      throughput: 524.6589716684156
       estimated_peak_memory_range:
         min: 12288
-        max: 14747456
+        max: 15189776
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: j0pxedxl5
+      job_id: jep2j8e45
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-11T11:57:22Z'
+    timestamp: '2024-06-22T23:05:32Z'
   - torchscript_onnx_tflite:
-      inference_time: 9155.0
-      throughput: 109.22992900054615
+      inference_time: 7949.0
+      throughput: 125.80198767140521
       estimated_peak_memory_range:
-        min: 3342336
-        max: 7015776
+        min: 3489792
+        max: 6727064
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jo5mvd895
+      job_id: jqpynem7g
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-11T11:57:23Z'
+    timestamp: '2024-06-22T23:05:33Z'
   - torchscript_onnx_qnn:
-      inference_time: 764.0
-      throughput: 1308.9005235602094
+      inference_time: 712.0
+      throughput: 1404.4943820224719
       estimated_peak_memory_range:
-        min: 1196032
-        max: 1196032
+        min: 98304
+        max: 98304
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jep23veqg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 781.0
-      throughput: 1280.4097311139565
-      estimated_peak_memory_range:
-        min: 7262208
-        max: 7262208
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 14
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 14
-      job_id: jogkr98n5
+      job_id: jogkdz82p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:57:30Z'
+    timestamp: '2024-06-22T23:05:36Z'
diff --git a/qai_hub_models/models/quicksrnetsmall/export.py b/qai_hub_models/models/quicksrnetsmall/export.py
index 7bf13e43..941bd800 100644
--- a/qai_hub_models/models/quicksrnetsmall/export.py
+++ b/qai_hub_models/models/quicksrnetsmall/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/quicksrnetsmall/info.yaml b/qai_hub_models/models/quicksrnetsmall/info.yaml
index 94d58142..2ccfe754 100644
--- a/qai_hub_models/models/quicksrnetsmall/info.yaml
+++ b/qai_hub_models/models/quicksrnetsmall/info.yaml
@@ -17,7 +17,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_small_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 27.2K
   Model size: 110 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/quicksrnetsmall/model.py b/qai_hub_models/models/quicksrnetsmall/model.py
index 9b21851e..83d53858 100644
--- a/qai_hub_models/models/quicksrnetsmall/model.py
+++ b/qai_hub_models/models/quicksrnetsmall/model.py
@@ -46,6 +46,5 @@ def from_pretrained(
         )
         checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
-        model.eval()
 
         return cls(model, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml
index 16c8950f..6364aa18 100644
--- a/qai_hub_models/models/quicksrnetsmall/perf.yaml
+++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: QuickSRNetSmall
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1334.0
-      throughput: 749.6251874062968
+      inference_time: 1328.0
+      throughput: 753.0120481927711
       estimated_peak_memory_range:
-        min: 28672
-        max: 1646912
+        min: 32768
+        max: 4162488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j1gle1xmp
+      job_id: jygzwenkg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1004.0
-      throughput: 996.01593625498
+      inference_time: 1017.0
+      throughput: 983.284169124877
       estimated_peak_memory_range:
         min: 221184
-        max: 10711856
+        max: 2807024
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jwgoe4rkp
+      job_id: jvgd0w3ep
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1405.0
-      throughput: 711.7437722419929
+    torchscript_onnx:
+      inference_time: 1440.0
+      throughput: 694.4444444444445
       estimated_peak_memory_range:
-        min: 212992
-        max: 2559280
+        min: 217088
+        max: 3113208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 13
-      job_id: jygzv4mxp
+      job_id: jegnx2nr5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:57:52Z'
+    timestamp: '2024-06-22T23:06:13Z'
   - torchscript_onnx_tflite:
-      inference_time: 936.0
-      throughput: 1068.3760683760684
+      inference_time: 915.0
+      throughput: 1092.896174863388
       estimated_peak_memory_range:
         min: 16384
-        max: 19633600
+        max: 19974256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jw56qd7yg
+      job_id: jz5wxo76p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 624.0
-      throughput: 1602.5641025641025
+      inference_time: 627.0
+      throughput: 1594.896331738437
       estimated_peak_memory_range:
         min: 208896
-        max: 13403568
+        max: 12686432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: j1pvz9drg
+      job_id: jz576z4lg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 949.0
-      throughput: 1053.740779768177
+    torchscript_onnx:
+      inference_time: 996.0
+      throughput: 1004.0160642570281
       estimated_peak_memory_range:
-        min: 212992
-        max: 12509200
+        min: 339968
+        max: 12452704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 13
-      job_id: jz5wm17mg
+      job_id: jopr9k09p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:57:53Z'
+    timestamp: '2024-06-22T23:06:14Z'
   - torchscript_onnx_tflite:
-      inference_time: 1319.0
-      throughput: 758.1501137225171
+      inference_time: 1379.0
+      throughput: 725.1631617113851
       estimated_peak_memory_range:
-        min: 20480
-        max: 7876136
+        min: 24576
+        max: 1325592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j1p3qw9n5
+      job_id: jmg98vmlp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 992.0
-      throughput: 1008.0645161290323
+      inference_time: 1020.0
+      throughput: 980.3921568627451
       estimated_peak_memory_range:
         min: 229376
-        max: 12485448
+        max: 2190712
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: jlpe4lzv5
+      job_id: j0pxmv41g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,7 +180,45 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:57:51Z'
+    timestamp: '2024-06-22T23:06:10Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1430.0
+      throughput: 699.3006993006993
+      estimated_peak_memory_range:
+        min: 6361088
+        max: 8078024
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 8
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 11
+      job_id: jnp130j25
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1003.0
+      throughput: 997.0089730807578
+      estimated_peak_memory_range:
+        min: 229376
+        max: 9373504
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 11
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 11
+      job_id: jo5m4rmw5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:06:12Z'
   - torchscript_onnx_qnn:
       inference_time: 1112.0
       throughput: 899.2805755395683
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 11
-      job_id: j7gjkw7e5
+      job_id: jqp48q1vg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1419.0
-      throughput: 704.7216349541931
+    torchscript_onnx:
+      inference_time: 1464.0
+      throughput: 683.0601092896175
       estimated_peak_memory_range:
-        min: 8966144
-        max: 8966144
+        min: 8876032
+        max: 8876032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 13
-      job_id: jmg99xm8g
+      job_id: jep2j8w45
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:57:53Z'
+    timestamp: '2024-06-22T23:06:15Z'
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py
index 48cf6baf..5145913e 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/export.py
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -127,7 +127,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -177,7 +177,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -195,7 +195,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -217,7 +217,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
index 37b32e7f..eefceadb 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml
@@ -18,7 +18,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet
 technical_details:
   Model checkpoint: quicksrnet_small_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 27.2K
   Model size: 34.9 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py
index 9b1c83a6..3491e3c0 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/model.py
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py
@@ -76,6 +76,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
-
         return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
index babfa73c..d9d93e62 100644
--- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
+++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: QuickSRNetSmall-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 974.0
-      throughput: 1026.694045174538
+      inference_time: 958.0
+      throughput: 1043.8413361169103
       estimated_peak_memory_range:
-        min: 12288
-        max: 2677152
+        min: 24576
+        max: 1721664
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jvgd7z3zg
+      job_id: j2p0kyj65
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 671.0
-      throughput: 1490.312965722802
+      inference_time: 665.0
+      throughput: 1503.7593984962407
       estimated_peak_memory_range:
-        min: 65536
-        max: 3287624
+        min: 16384
+        max: 56667584
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: jegnr7nq5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 691.0
-      throughput: 1447.178002894356
-      estimated_peak_memory_range:
-        min: 53248
-        max: 3206304
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 12
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 12
-      job_id: j2p0evjn5
+      job_id: j1p38k9l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:58:18Z'
+    timestamp: '2024-06-22T23:06:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 793.0
-      throughput: 1261.034047919294
+      inference_time: 831.0
+      throughput: 1203.3694344163657
       estimated_peak_memory_range:
-        min: 16384
-        max: 19014000
+        min: 20480
+        max: 19530352
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jz57v7495
+      job_id: j1p88oxxp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 458.0
-      throughput: 2183.406113537118
+      inference_time: 452.0
+      throughput: 2212.3893805309735
       estimated_peak_memory_range:
-        min: 81920
-        max: 14588544
+        min: 65536
+        max: 14002832
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: jopr1n07g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 553.0
-      throughput: 1808.3182640144666
-      estimated_peak_memory_range:
-        min: 65536
-        max: 10727936
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 12
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 12
-      job_id: j1p8w4xop
+      job_id: jwgomyrx5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:58:19Z'
+    timestamp: '2024-06-22T23:06:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 960.0
-      throughput: 1041.6666666666667
+      inference_time: 964.0
+      throughput: 1037.344398340249
       estimated_peak_memory_range:
-        min: 20480
-        max: 1503368
+        min: 24576
+        max: 3058040
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jqp4j911p
+      job_id: jogkdz42p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 672.0
-      throughput: 1488.095238095238
+      inference_time: 661.0
+      throughput: 1512.8593040847202
       estimated_peak_memory_range:
-        min: 12288
-        max: 47001808
+        min: 16384
+        max: 18934480
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: jqpyv7xlp
+      job_id: j7gj1x7xg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:58:17Z'
+    timestamp: '2024-06-22T23:06:43Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1136.0
+      throughput: 880.2816901408451
+      estimated_peak_memory_range:
+        min: 28672
+        max: 1437824
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 8
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 11
+      job_id: jn5qw8y45
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 665.0
+      throughput: 1503.7593984962407
+      estimated_peak_memory_range:
+        min: 12288
+        max: 10731904
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 8
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 8
+      job_id: jlpe29z1p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:06:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 1754.0
-      throughput: 570.1254275940707
+      inference_time: 2549.0
+      throughput: 392.31071008238524
       estimated_peak_memory_range:
         min: 12288
-        max: 13580528
+        max: 14671248
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: j0pxed4l5
+      job_id: j1gl7nx85
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-11T11:58:12Z'
+    timestamp: '2024-06-22T23:06:36Z'
   - torchscript_onnx_tflite:
-      inference_time: 5837.0
-      throughput: 171.32088401576152
+      inference_time: 5757.0
+      throughput: 173.70158068438423
       estimated_peak_memory_range:
-        min: 249856
-        max: 7133040
+        min: 172032
+        max: 7165888
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 11
-      job_id: jo5mvdm95
+      job_id: jw56v670p
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-11T11:58:13Z'
+    timestamp: '2024-06-22T23:06:38Z'
   - torchscript_onnx_qnn:
-      inference_time: 718.0
-      throughput: 1392.757660167131
+      inference_time: 689.0
+      throughput: 1451.3788098693758
       estimated_peak_memory_range:
-        min: 1077248
-        max: 1077248
+        min: 57344
+        max: 57344
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 8
-      job_id: jep23vwqg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 698.0
-      throughput: 1432.6647564469913
-      estimated_peak_memory_range:
-        min: 7000064
-        max: 7000064
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 12
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 12
-      job_id: jogkr94n5
+      job_id: j1pv43djp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:58:20Z'
+    timestamp: '2024-06-22T23:06:41Z'
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/export.py b/qai_hub_models/models/real_esrgan_general_x4v3/export.py
index 4aa9e1fd..0e7bf90f 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/export.py
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
index 6f9afa6c..726c3bb8 100644
--- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
+++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Real-ESRGAN-General-x4v3
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7261.0
-      throughput: 137.72207684891887
+      inference_time: 7246.0
+      throughput: 138.0071763731714
       estimated_peak_memory_range:
-        min: 17604608
-        max: 25105264
+        min: 6352896
+        max: 7871664
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: jmg994wwg
+      job_id: jqp48qyvg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6295.0
-      throughput: 158.85623510722795
+      inference_time: 6271.0
+      throughput: 159.46420028703557
       estimated_peak_memory_range:
-        min: 221184
-        max: 4921640
+        min: 16384
+        max: 21425416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jz57vdxv5
+      job_id: jopr9kl9p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6938.0
-      throughput: 144.13375612568464
+    torchscript_onnx:
+      inference_time: 6849.0
+      throughput: 146.00671630895022
       estimated_peak_memory_range:
-        min: 6332416
-        max: 55155560
+        min: 8425472
+        max: 21248128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 74
-      job_id: jegnre3k5
+      job_id: jogkdz22p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T22:59:37Z'
+    timestamp: '2024-06-22T23:07:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 5588.0
-      throughput: 178.9549033643522
+      inference_time: 5415.0
+      throughput: 184.67220683287167
       estimated_peak_memory_range:
         min: 20480
-        max: 56093568
+        max: 59196832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: jnp1q8e8g
+      job_id: j0pxmvl1g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 4604.0
-      throughput: 217.2024326672459
+      inference_time: 4603.0
+      throughput: 217.24961981316534
       estimated_peak_memory_range:
-        min: 208896
-        max: 37726496
+        min: 0
+        max: 29466816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jqp4jwv8p
+      job_id: jep2j8r45
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5181.0
-      throughput: 193.01293186643505
+    torchscript_onnx:
+      inference_time: 5194.0
+      throughput: 192.52984212552946
       estimated_peak_memory_range:
-        min: 2310144
-        max: 36289552
+        min: 8531968
+        max: 46212560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 74
-      job_id: jopr1ye0g
+      job_id: jn5qw8l45
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T22:59:38Z'
+    timestamp: '2024-06-22T23:07:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 7376.0
-      throughput: 135.57483731019522
+      inference_time: 7329.0
+      throughput: 136.4442625187611
       estimated_peak_memory_range:
-        min: 6377472
-        max: 7904672
+        min: 15773696
+        max: 18993704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 72
-      job_id: jvgd7vorg
+      job_id: jo5m4r0w5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6271.0
-      throughput: 159.46420028703557
+      inference_time: 6252.0
+      throughput: 159.9488163787588
       estimated_peak_memory_range:
-        min: 131072
-        max: 5213032
+        min: 20480
+        max: 9802976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: jo5mvz3d5
+      job_id: j2p0kym65
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T22:59:36Z'
+    timestamp: '2024-06-22T23:07:17Z'
+  - torchscript_onnx_tflite:
+      inference_time: 7428.0
+      throughput: 134.62574044157242
+      estimated_peak_memory_range:
+        min: 15761408
+        max: 17170032
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 69
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 72
+      job_id: jegnx2zr5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 6297.0
+      throughput: 158.8057805304113
+      estimated_peak_memory_range:
+        min: 16384
+        max: 6527512
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 72
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 72
+      job_id: j1p88oexp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:07:18Z'
   - torchscript_onnx_qnn:
-      inference_time: 8670.0
-      throughput: 115.34025374855824
+      inference_time: 6159.0
+      throughput: 162.3640201331385
       estimated_peak_memory_range:
-        min: 208896
-        max: 208896
+        min: 212992
+        max: 212992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 72
-      job_id: j0pxe1y35
+      job_id: jqpyneo7g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7041.0
-      throughput: 142.02528049992898
+    torchscript_onnx:
+      inference_time: 7065.0
+      throughput: 141.54281670205236
       estimated_peak_memory_range:
-        min: 8646656
-        max: 8646656
+        min: 8929280
+        max: 8929280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 74
-      job_id: jep23mlrg
+      job_id: j1gl7ny85
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T22:59:39Z'
+    timestamp: '2024-06-22T23:07:22Z'
diff --git a/qai_hub_models/models/real_esrgan_x4plus/export.py b/qai_hub_models/models/real_esrgan_x4plus/export.py
index ab0454e1..14243e7b 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/export.py
+++ b/qai_hub_models/models/real_esrgan_x4plus/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -115,7 +115,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # 2. Compile the model to an on-device asset
@@ -172,7 +171,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/real_esrgan_x4plus/model.py b/qai_hub_models/models/real_esrgan_x4plus/model.py
index 965ce49b..758f6172 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/model.py
+++ b/qai_hub_models/models/real_esrgan_x4plus/model.py
@@ -50,9 +50,7 @@ def from_pretrained(
         """Load RealESRGAN from a weightfile created by the source RealESRGAN repository."""
 
         # Load PyTorch model from disk
-        realesrgan_model = _load_realesrgan_source_model_from_weights(
-            weight_path
-        ).eval()
+        realesrgan_model = _load_realesrgan_source_model_from_weights(weight_path)
 
         return cls(realesrgan_model)
 
@@ -74,11 +72,10 @@ def forward(self, image: torch.Tensor) -> torch.Tensor:
                    3-channel Color Space: RGB
         """
 
-        with torch.no_grad():
-            # upscale
-            output = self.model(image)
+        # upscale
+        output = self.model(image)
 
-            output_img = output.squeeze().float().cpu().clamp_(0, 1)
+        output_img = output.squeeze().float().cpu().clamp_(0, 1)
 
         return output_img
 
diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
index c2ee5598..ceafb54d 100644
--- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
+++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Real-ESRGAN-x4plus
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 71761.0
-      throughput: 13.935145831301124
+      inference_time: 69551.0
+      throughput: 14.377938491179135
       estimated_peak_memory_range:
-        min: 4210688
-        max: 13102152
+        min: 16384
+        max: 5486376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: j2p0erl95
+      job_id: j1p38kzl5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 70398.0
-      throughput: 14.204949004233075
+      inference_time: 67063.0
+      throughput: 14.911352012286955
       estimated_peak_memory_range:
-        min: 12288
-        max: 106397920
+        min: 94208
+        max: 57757240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: jn5q923np
+      job_id: jlpe2971p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 65953.0
-      throughput: 15.162312555911027
+    torchscript_onnx:
+      inference_time: 68220.0
+      throughput: 14.65845793022574
       estimated_peak_memory_range:
-        min: 6344704
-        max: 155593192
+        min: 0
+        max: 157156904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: jwgoev3qp
+      job_id: jvgd0w6ep
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:00:20Z'
+    timestamp: '2024-06-22T23:08:06Z'
   - torchscript_onnx_tflite:
-      inference_time: 52163.0
-      throughput: 19.170676533174856
+      inference_time: 54479.0
+      throughput: 18.355696690467887
       estimated_peak_memory_range:
-        min: 77824
-        max: 586842272
+        min: 3272704
+        max: 609931104
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: j1p8w7zkp
+      job_id: jwgomylx5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 50801.0
-      throughput: 19.684651876931557
+      inference_time: 50583.0
+      throughput: 19.769487772571814
       estimated_peak_memory_range:
-        min: 102400
-        max: 264449376
+        min: 73728
+        max: 223617072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: j1glek3jp
+      job_id: jygzwelkg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 51691.0
-      throughput: 19.34572749608249
+    torchscript_onnx:
+      inference_time: 51083.0
+      throughput: 19.57598418260478
       estimated_peak_memory_range:
-        min: 6029312
-        max: 190175536
+        min: 6291456
+        max: 172334736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: j1pvzwwkg
+      job_id: jz576zolg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:00:21Z'
+    timestamp: '2024-06-22T23:08:07Z'
   - torchscript_onnx_tflite:
-      inference_time: 67995.0
-      throughput: 14.706963747334363
+      inference_time: 67790.0
+      throughput: 14.75143826523086
       estimated_peak_memory_range:
-        min: 1552384
-        max: 4034000
+        min: 20480
+        max: 2510496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1028
-      job_id: jogkry3w5
+      job_id: j1pv43ljp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 69414.0
-      throughput: 14.406315728815512
+      inference_time: 67113.0
+      throughput: 14.900242873958845
       estimated_peak_memory_range:
-        min: 0
-        max: 56605216
+        min: 49152
+        max: 107219464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1031
-      job_id: j1p3qme35
+      job_id: jmg98volp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:00:19Z'
+    timestamp: '2024-06-22T23:08:04Z'
+  - torchscript_onnx_tflite:
+      inference_time: 71518.0
+      throughput: 13.982493917615146
+      estimated_peak_memory_range:
+        min: 3284992
+        max: 7008096
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1028
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1028
+      job_id: j7gj1xrxg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 66977.0
+      throughput: 14.930498529345895
+      estimated_peak_memory_range:
+        min: 118784
+        max: 107384672
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1031
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1031
+      job_id: jnp130o25
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:08:05Z'
   - torchscript_onnx_qnn:
-      inference_time: 73906.0
-      throughput: 13.530701160934159
+      inference_time: 65488.0
+      throughput: 15.2699731248473
       estimated_peak_memory_range:
-        min: 212992
-        max: 212992
+        min: 204800
+        max: 204800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: jw56q1n6g
+      job_id: jz5wxoy6p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 65787.0
-      throughput: 15.20057154148996
+    torchscript_onnx:
+      inference_time: 65810.0
+      throughput: 15.1952590791673
       estimated_peak_memory_range:
-        min: 233472
-        max: 233472
+        min: 270336
+        max: 270336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1030
-      job_id: j7gjkllv5
+      job_id: jqp48qevg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:00:22Z'
+    timestamp: '2024-06-22T23:08:08Z'
diff --git a/qai_hub_models/models/regnet/export.py b/qai_hub_models/models/regnet/export.py
index 48731313..0954dcb2 100644
--- a/qai_hub_models/models/regnet/export.py
+++ b/qai_hub_models/models/regnet/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml
index 6dbdb072..627462fa 100644
--- a/qai_hub_models/models/regnet/perf.yaml
+++ b/qai_hub_models/models/regnet/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: RegNet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2344.0
-      throughput: 426.6211604095563
+      inference_time: 2041.0
+      throughput: 489.9559039686428
       estimated_peak_memory_range:
-        min: 40960
-        max: 2564000
+        min: 20480
+        max: 2516576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 114
-      job_id: jygzv77op
+      job_id: jo5m4r9w5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2105.0
-      throughput: 475.05938242280286
+      inference_time: 2111.0
+      throughput: 473.70914258645195
       estimated_peak_memory_range:
-        min: 16384
-        max: 66214464
+        min: 622592
+        max: 11175952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: jnp1q888g
+      job_id: jqpyne87g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2313.0
-      throughput: 432.33895373973195
+    torchscript_onnx:
+      inference_time: 2362.0
+      throughput: 423.3700254022015
       estimated_peak_memory_range:
-        min: 16384
-        max: 109504192
+        min: 12288
+        max: 110151488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 190
-      job_id: jnp1q887g
+      job_id: j1gl7nw85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:00:51Z'
+    timestamp: '2024-06-22T23:08:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 1623.0
-      throughput: 616.1429451632779
+      inference_time: 1413.0
+      throughput: 707.7140835102618
       estimated_peak_memory_range:
         min: 16384
-        max: 137911392
+        max: 142891136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 114
-      job_id: jz5wm993g
+      job_id: jegnx21r5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1494.0
-      throughput: 669.3440428380187
+      inference_time: 1485.0
+      throughput: 673.4006734006734
       estimated_peak_memory_range:
         min: 618496
-        max: 75619760
+        max: 68646048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: jvgd7vvrg
+      job_id: j2p0kyo65
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1637.0
-      throughput: 610.8735491753207
+    torchscript_onnx:
+      inference_time: 1562.0
+      throughput: 640.2048655569782
       estimated_peak_memory_range:
-        min: 0
-        max: 37581584
+        min: 618496
+        max: 35955472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 190
-      job_id: jvgd7vvzg
+      job_id: jw56v6o0p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:00:52Z'
+    timestamp: '2024-06-22T23:08:41Z'
   - torchscript_onnx_tflite:
-      inference_time: 2318.0
-      throughput: 431.40638481449525
+      inference_time: 2008.0
+      throughput: 498.00796812749
       estimated_peak_memory_range:
-        min: 16384
-        max: 2479152
+        min: 24576
+        max: 1706424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 114
-      job_id: jmg9944wg
+      job_id: jopr9kx9p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2102.0
-      throughput: 475.7373929590866
+      inference_time: 2109.0
+      throughput: 474.158368895211
       estimated_peak_memory_range:
         min: 12288
-        max: 14056768
+        max: 66527232
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: jmg99448g
+      job_id: jogkdz62p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:00:50Z'
+    timestamp: '2024-06-22T23:08:38Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2028.0
+      throughput: 493.0966469428008
+      estimated_peak_memory_range:
+        min: 16384
+        max: 2181800
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 114
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 114
+      job_id: jep2j8o45
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2108.0
+      throughput: 474.3833017077799
+      estimated_peak_memory_range:
+        min: 24576
+        max: 16473816
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 188
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 188
+      job_id: jn5qw8445
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:08:39Z'
   - torchscript_onnx_qnn:
-      inference_time: 2475.0
-      throughput: 404.04040404040404
+      inference_time: 2204.0
+      throughput: 453.7205081669691
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 188
-      job_id: jz5wm99mg
+      job_id: j1p88ojxp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2215.0
-      throughput: 451.46726862302484
+    torchscript_onnx:
+      inference_time: 2182.0
+      throughput: 458.29514207149407
       estimated_peak_memory_range:
-        min: 651264
-        max: 651264
+        min: 69373952
+        max: 69373952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 190
-      job_id: jz57vdd95
+      job_id: j1p38kol5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:00:53Z'
+    timestamp: '2024-06-22T23:08:42Z'
diff --git a/qai_hub_models/models/regnet_quantized/README.md b/qai_hub_models/models/regnet_quantized/README.md
new file mode 100644
index 00000000..c20388d0
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/README.md
@@ -0,0 +1,61 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [RegNetQuantized: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/regnet_quantized)
+
+RegNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases.
+
+This is based on the implementation of RegNetQuantized found
+[here](https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/regnet_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+
+
+## Example & Usage
+
+Install the package via pip:
+```bash
+pip install "qai_hub_models[regnet_quantized]"
+```
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.regnet_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.regnet_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of RegNetQuantized can be found
+  [here](https://github.com/pytorch/vision/blob/main/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
+
+## References
+* [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678)
+* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py)
+
+## Community
+* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/regnet_quantized/__init__.py b/qai_hub_models/models/regnet_quantized/__init__.py
new file mode 100644
index 00000000..5803cf7b
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.app import (  # noqa: F401
+    ImagenetClassifierApp as App,
+)
+
+from .model import MODEL_ID  # noqa: F401
+from .model import RegNetQuantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/regnet_quantized/conftest.py b/qai_hub_models/models/regnet_quantized/conftest.py
new file mode 100644
index 00000000..692d0feb
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/conftest.py
@@ -0,0 +1,37 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+import inspect
+
+import pytest
+
+from qai_hub_models.models.regnet_quantized import Model
+
+
+# Instantiate the model only once for all tests.
+# Mock from_pretrained to always return the initialized model.
+# This speeds up tests and limits memory leaks.
+@pytest.fixture(scope="module", autouse=True)
+def cached_from_pretrained():
+    with pytest.MonkeyPatch.context() as mp:
+        pretrained_cache = {}
+        from_pretrained = Model.from_pretrained
+        sig = inspect.signature(from_pretrained)
+
+        def _cached_from_pretrained(*args, **kwargs):
+            cache_key = str(args) + str(kwargs)
+            model = pretrained_cache.get(cache_key, None)
+            if model:
+                return model
+            else:
+                model = from_pretrained(*args, **kwargs)
+                pretrained_cache[cache_key] = model
+                return model
+
+        _cached_from_pretrained.__signature__ = sig
+
+        mp.setattr(Model, "from_pretrained", _cached_from_pretrained)
+        yield mp
diff --git a/qai_hub_models/models/regnet_quantized/demo.py b/qai_hub_models/models/regnet_quantized/demo.py
new file mode 100644
index 00000000..d7513945
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
+from qai_hub_models.models.regnet_quantized.model import MODEL_ID, RegNetQuantizable
+
+
+def main(is_test: bool = False):
+    imagenet_demo(RegNetQuantizable, MODEL_ID, is_test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/regnet_quantized/evaluate.py b/qai_hub_models/models/regnet_quantized/evaluate.py
new file mode 100644
index 00000000..4eb83eec
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.regnet_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/regnet_quantized/export.py b/qai_hub_models/models/regnet_quantized/export.py
new file mode 100644
index 00000000..9a3d5b70
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/export.py
@@ -0,0 +1,236 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.regnet_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23 (Family)",
+    chipset: Optional[str] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        chipset: If set, will choose a random device with this chipset.
+            Overrides the `device` argument.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "regnet_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if chipset:
+        hub_device = hub.Device(attributes=f"chipset:{chipset}")
+    else:
+        hub_device = hub.Device(name=device)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "regnet_quantized",
+            "RegNetQuantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
+    channel_last_flags = (
+        " --force_channel_last_input image_tensor"
+        if target_runtime != TargetRuntime.ONNX
+        else ""
+    )
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options + channel_last_flags, hub_device
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub_device,
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = (
+            sample_inputs
+            if target_runtime == TargetRuntime.ONNX
+            else transpose_channel_first_to_last(
+                "image_tensor", sample_inputs, target_runtime
+            )
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = "so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = "tflite"
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
+            target_runtime_extension = "onnx"
+
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        print_inference_metrics(
+            inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
+        )
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/regnet_quantized/info.yaml b/qai_hub_models/models/regnet_quantized/info.yaml
new file mode 100644
index 00000000..170239e5
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/info.yaml
@@ -0,0 +1,44 @@
+name: RegNetQuantized
+# id must match with the model dir name in qai_hub_models
+id: regnet_quantized
+status: public
+headline: Imagenet classifier and general purpose backbone.
+domain: Computer Vision
+description: RegNet is a machine learning model that can classify images from the
+  Imagenet dataset. It can also be used as a backbone in building more complex models
+  for specific use cases.
+use_case: Image Classification
+tags:
+  - backbone
+  - quantized
+research_paper: https://arxiv.org/abs/2003.13678
+research_paper_title: Designing Network Design Spaces
+license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py
+technical_details:
+  Model checkpoint: Imagenet
+  Input resolution: 224x224
+  Number of parameters: 15.3M
+  Model size: 15.4 MB
+applicable_scenarios:
+  - Medical Imaging
+  - Anomaly Detection
+  - Inventory Management
+related_models:
+  - mobilenet_v2
+  - densenet121
+  - googlenet
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+  - XR
+has_static_banner: yes
+has_animated_banner: yes
+license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
+dataset:
+  - imagenet-1k
+  - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/regnet_quantized/model.py b/qai_hub_models/models/regnet_quantized/model.py
new file mode 100644
index 00000000..47e79fed
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/model.py
@@ -0,0 +1,86 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+    constrain_quantized_inputs_to_image_range,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import (
+    equalize_bn_folded_model,
+    fold_all_batch_norms,
+)
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.regnet.model import RegNet
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 4
+DEFAULT_ENCODINGS = "regnet_quantized_encodings.json"
+
+
+class RegNetQuantizable(AIMETQuantizableMixin, RegNet):
+    """RegNet with post train quantization support.
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        sim_model: QuantizationSimModel,
+    ) -> None:
+        # Input is already normalized by sim_model. Disable it in the wrapper model.
+        RegNet.__init__(self, sim_model.model, normalize_input=False)
+        AIMETQuantizableMixin.__init__(
+            self,
+            sim_model,
+        )
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "RegNetQuantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        model = RegNet.from_pretrained()
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+        model = prepare_model(model)
+        dummy_input = torch.rand(input_shape)
+
+        pairs = fold_all_batch_norms(model, input_shape, dummy_input)
+        equalize_bn_folded_model(model, input_shape, pairs, dummy_input)
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=dummy_input,
+        )
+        constrain_quantized_inputs_to_image_range(sim)
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        return cls(sim)
diff --git a/qai_hub_models/models/regnet_quantized/perf.yaml b/qai_hub_models/models/regnet_quantized/perf.yaml
new file mode 100644
index 00000000..fa000bc3
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/perf.yaml
@@ -0,0 +1,265 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - QCS8250 (Proxy)
+  - QCS8550 (Proxy)
+  - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy S24+
+  - Samsung Galaxy Tab S8
+  - Snapdragon X Elite CRD
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Qcs8250
+  - Qcs8550
+  - Sa8540p
+  - Sa8775p
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+  - Snapdragon® X Elite
+models:
+- name: RegNetQuantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 898.0
+      throughput: 1113.5857461024498
+      estimated_peak_memory_range:
+        min: 28672
+        max: 1533712
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 114
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 114
+      job_id: j1pv432jp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1009.0
+      throughput: 991.0802775024777
+      estimated_peak_memory_range:
+        min: 16384
+        max: 63537136
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: jnp130125
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-06-22T23:09:40Z'
+  - torchscript_onnx_tflite:
+      inference_time: 639.0
+      throughput: 1564.9452269170579
+      estimated_peak_memory_range:
+        min: 16384
+        max: 131360192
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 114
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 114
+      job_id: j7gj1x3xg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 735.0
+      throughput: 1360.544217687075
+      estimated_peak_memory_range:
+        min: 163840
+        max: 64946208
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: jvgd0w4ep
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-06-22T23:09:41Z'
+  - torchscript_onnx_tflite:
+      inference_time: 887.0
+      throughput: 1127.3957158962796
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1452496
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 114
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 114
+      job_id: jlpe2961p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1006.0
+      throughput: 994.0357852882704
+      estimated_peak_memory_range:
+        min: 24576
+        max: 52560944
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: jmg98v2wp
+      job_status: Passed
+    reference_device_info:
+      name: QCS8550 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8550
+    timestamp: '2024-06-22T23:09:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 892.0
+      throughput: 1121.0762331838564
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1588400
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 114
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 114
+      job_id: jygzwezkg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1011.0
+      throughput: 989.1196834817013
+      estimated_peak_memory_range:
+        min: 180224
+        max: 11914200
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: jnp130185
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:09:45Z'
+  - torchscript_onnx_tflite:
+      inference_time: 29271.0
+      throughput: 34.1635065423115
+      estimated_peak_memory_range:
+        min: 114688
+        max: 75067024
+      primary_compute_unit: GPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 114
+        layers_on_cpu: 0
+        total_layers: 114
+      job_id: jz5wxoz6p
+      job_status: Passed
+    reference_device_info:
+      name: RB3 Gen 2 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs6490
+    timestamp: '2024-06-22T23:09:37Z'
+  - torchscript_onnx_tflite:
+      inference_time: 42180.0
+      throughput: 23.70791844476055
+      estimated_peak_memory_range:
+        min: 299008
+        max: 67927560
+      primary_compute_unit: GPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 10
+        layers_on_gpu: 91
+        layers_on_cpu: 13
+        total_layers: 114
+      job_id: jmg98v2lp
+      job_status: Passed
+    reference_device_info:
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8250
+    timestamp: '2024-06-22T23:09:38Z'
+  - torchscript_onnx_qnn:
+      inference_time: 1104.0
+      throughput: 905.7971014492754
+      estimated_peak_memory_range:
+        min: 495616
+        max: 495616
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 113
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 113
+      job_id: jz5wxoz3p
+      job_status: Passed
+    reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-06-22T23:09:42Z'
diff --git a/qai_hub_models/models/regnet_quantized/requirements.txt b/qai_hub_models/models/regnet_quantized/requirements.txt
new file mode 100644
index 00000000..e3567f29
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/requirements.txt
@@ -0,0 +1 @@
+aimet-torch==1.31.2; sys_platform == "linux"
diff --git a/qai_hub_models/models/regnet_quantized/test.py b/qai_hub_models/models/regnet_quantized/test.py
new file mode 100644
index 00000000..6018cb2a
--- /dev/null
+++ b/qai_hub_models/models/regnet_quantized/test.py
@@ -0,0 +1,30 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
+    run_imagenet_classifier_test,
+)
+from qai_hub_models.models.regnet_quantized.demo import main as demo_main
+from qai_hub_models.models.regnet_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    RegNetQuantizable,
+)
+
+
+def test_task():
+    run_imagenet_classifier_test(
+        RegNetQuantizable.from_pretrained(),
+        MODEL_ID,
+        probability_threshold=0.45,
+        diff_tol=0.005,
+        atol=0.2,
+        rtol=0.02,
+        asset_version=MODEL_ASSET_VERSION,
+    )
+
+
+def test_demo():
+    # Verify demo does not crash
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/resnet101/export.py b/qai_hub_models/models/resnet101/export.py
index 337029e4..6bc47ee7 100644
--- a/qai_hub_models/models/resnet101/export.py
+++ b/qai_hub_models/models/resnet101/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml
index c30b049f..61225b6f 100644
--- a/qai_hub_models/models/resnet101/perf.yaml
+++ b/qai_hub_models/models/resnet101/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ResNet101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3383.0
-      throughput: 295.5956251847473
+      inference_time: 3404.0
+      throughput: 293.7720329024677
       estimated_peak_memory_range:
-        min: 16384
-        max: 2493664
+        min: 204800
+        max: 2170488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: j1pvzw1rg
+      job_id: jopr9kd0p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3448.0
-      throughput: 290.0232018561485
+      inference_time: 3471.0
+      throughput: 288.1014116969173
       estimated_peak_memory_range:
-        min: 16384
-        max: 173843416
+        min: 622592
+        max: 174493464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jygzv7xxp
+      job_id: j1p88orkp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3614.0
-      throughput: 276.70171555063644
+    torchscript_onnx:
+      inference_time: 3563.0
+      throughput: 280.662363177098
       estimated_peak_memory_range:
-        min: 45056
-        max: 355647168
+        min: 12288
+        max: 322812304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jvgd7vrzg
+      job_id: j1p38k735
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:02:27Z'
+    timestamp: '2024-06-22T23:10:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 2440.0
-      throughput: 409.8360655737705
+      inference_time: 2446.0
+      throughput: 408.8307440719542
       estimated_peak_memory_range:
         min: 16384
-        max: 109471344
+        max: 116075088
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: j7gjkl0e5
+      job_id: jep2j8dr5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2514.0
-      throughput: 397.77247414478916
+      inference_time: 2511.0
+      throughput: 398.24771007566704
       estimated_peak_memory_range:
         min: 618496
-        max: 81083536
+        max: 73670352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jz5wm9dmg
+      job_id: jogkdz0wp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2575.0
-      throughput: 388.3495145631068
+    torchscript_onnx:
+      inference_time: 2592.0
+      throughput: 385.8024691358025
       estimated_peak_memory_range:
         min: 618496
-        max: 46866960
+        max: 41407440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jz57vdj95
+      job_id: jwgomywq5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:02:28Z'
+    timestamp: '2024-06-22T23:10:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 3388.0
-      throughput: 295.159386068477
+      inference_time: 3411.0
+      throughput: 293.1691586045148
       estimated_peak_memory_range:
-        min: 28672
-        max: 1888064
+        min: 24576
+        max: 2249240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jlpe4vrv5
+      job_id: jqpyne28g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3458.0
-      throughput: 289.1844997108155
+      inference_time: 3460.0
+      throughput: 289.01734104046244
       estimated_peak_memory_range:
-        min: 626688
-        max: 163514888
+        min: 618496
+        max: 152968824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jnp1q8d7g
+      job_id: j1gl7n8j5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:02:26Z'
+    timestamp: '2024-06-22T23:10:23Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3389.0
+      throughput: 295.0722927117144
+      estimated_peak_memory_range:
+        min: 53248
+        max: 2091288
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 147
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 147
+      job_id: j2p0ky995
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 3449.0
+      throughput: 289.93911278631487
+      estimated_peak_memory_range:
+        min: 618496
+        max: 163506848
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 245
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 245
+      job_id: jw56v6m6p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:10:24Z'
   - torchscript_onnx_qnn:
-      inference_time: 4024.0
-      throughput: 248.5089463220676
+      inference_time: 3490.0
+      throughput: 286.5329512893983
       estimated_peak_memory_range:
-        min: 1011712
-        max: 1011712
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jmg99438g
+      job_id: jn5qw81n5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3536.0
-      throughput: 282.80542986425337
+    torchscript_onnx:
+      inference_time: 3502.0
+      throughput: 285.5511136493432
       estimated_peak_memory_range:
-        min: 43122688
-        max: 43122688
+        min: 48291840
+        max: 48291840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jqp4jwx1p
+      job_id: j1pv43nkp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:02:29Z'
+    timestamp: '2024-06-22T23:10:27Z'
diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py
index 109a3790..88067ea0 100644
--- a/qai_hub_models/models/resnet101_quantized/export.py
+++ b/qai_hub_models/models/resnet101_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet101_quantized/model.py b/qai_hub_models/models/resnet101_quantized/model.py
index f8475316..c4cfa229 100644
--- a/qai_hub_models/models/resnet101_quantized/model.py
+++ b/qai_hub_models/models/resnet101_quantized/model.py
@@ -86,5 +86,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml
index 9bf5bafe..2c0ca710 100644
--- a/qai_hub_models/models/resnet101_quantized/perf.yaml
+++ b/qai_hub_models/models/resnet101_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -45,8 +49,8 @@ models:
       inference_time: 1188.0
       throughput: 841.7508417508418
       estimated_peak_memory_range:
-        min: 0
-        max: 1614400
+        min: 12288
+        max: 3233240
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jo5mvzw95
+      job_id: jlpe29nop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1377.0
-      throughput: 726.2164124909223
+      inference_time: 1371.0
+      throughput: 729.3946024799417
       estimated_peak_memory_range:
         min: 12288
-        max: 58349752
+        max: 58942072
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j2p0er1n5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1486.0
-      throughput: 672.9475100942127
-      estimated_peak_memory_range:
-        min: 12288
-        max: 87121872
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 151
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 151
-      job_id: j1glekemp
+      job_id: jz576z8vg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:04:23Z'
+    timestamp: '2024-06-22T23:12:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 927.0
-      throughput: 1078.7486515641856
+      inference_time: 912.0
+      throughput: 1096.4912280701753
       estimated_peak_memory_range:
         min: 12288
-        max: 93411600
+        max: 98535088
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jegnre9q5
+      job_id: jygzwe0og
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1078.0
-      throughput: 927.643784786642
+      inference_time: 1051.0
+      throughput: 951.4747859181732
       estimated_peak_memory_range:
         min: 163840
-        max: 66249856
+        max: 58236944
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j1p8w73op
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1162.0
-      throughput: 860.5851979345955
-      estimated_peak_memory_range:
-        min: 0
-        max: 47460512
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 151
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 151
-      job_id: jw56q1qyg
+      job_id: jqp48q28g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:04:24Z'
+    timestamp: '2024-06-22T23:12:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 1171.0
-      throughput: 853.9709649871904
+      inference_time: 1183.0
+      throughput: 845.30853761623
       estimated_peak_memory_range:
         min: 12288
-        max: 1692848
+        max: 1566288
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jopr1y47g
+      job_id: jz5wxor3p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1379.0
-      throughput: 725.1631617113851
+      inference_time: 1377.0
+      throughput: 726.2164124909223
       estimated_peak_memory_range:
-        min: 12288
-        max: 47947408
+        min: 16384
+        max: 398643256
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jn5q927op
+      job_id: jo5m4rld5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:04:22Z'
+    timestamp: '2024-06-22T23:12:21Z'
   - torchscript_onnx_tflite:
-      inference_time: 4690.0
-      throughput: 213.21961620469082
+      inference_time: 1187.0
+      throughput: 842.4599831508003
+      estimated_peak_memory_range:
+        min: 20480
+        max: 54362504
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jmg98vqwp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1376.0
+      throughput: 726.7441860465116
       estimated_peak_memory_range:
         min: 12288
-        max: 30183472
+        max: 49478392
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 146
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 146
+      job_id: jegnx2wk5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:12:22Z'
+  - torchscript_onnx_tflite:
+      inference_time: 4848.0
+      throughput: 206.27062706270627
+      estimated_peak_memory_range:
+        min: 28672
+        max: 33426016
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jep23m7qg
+      job_id: jnp130m85
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:04:17Z'
+    timestamp: '2024-06-22T23:12:15Z'
   - torchscript_onnx_tflite:
-      inference_time: 17058.0
-      throughput: 58.62351975612616
+      inference_time: 17263.0
+      throughput: 57.92735909169901
       estimated_peak_memory_range:
-        min: 40960
-        max: 1956688
+        min: 32768
+        max: 1987040
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jqpyvd4lp
+      job_id: jvgd0wmrp
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:04:18Z'
+    timestamp: '2024-06-22T23:12:16Z'
   - torchscript_onnx_qnn:
-      inference_time: 1381.0
-      throughput: 724.112961622013
+      inference_time: 1308.0
+      throughput: 764.525993883792
       estimated_peak_memory_range:
-        min: 270336
-        max: 270336
+        min: 491520
+        max: 491520
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jogkryln5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1313.0
-      throughput: 761.6146230007616
-      estimated_peak_memory_range:
-        min: 24576
-        max: 24576
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 151
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 151
-      job_id: j1p3qmqn5
+      job_id: j0pxmvz3g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:04:25Z'
+    timestamp: '2024-06-22T23:12:20Z'
diff --git a/qai_hub_models/models/resnet18/export.py b/qai_hub_models/models/resnet18/export.py
index b4ca9324..7a8b8692 100644
--- a/qai_hub_models/models/resnet18/export.py
+++ b/qai_hub_models/models/resnet18/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml
index ff6f698a..522c5a5c 100644
--- a/qai_hub_models/models/resnet18/perf.yaml
+++ b/qai_hub_models/models/resnet18/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ResNet18
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1416.0
-      throughput: 706.2146892655368
+      inference_time: 1414.0
+      throughput: 707.2135785007072
       estimated_peak_memory_range:
-        min: 61440
-        max: 1999640
+        min: 32768
+        max: 1539816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 38
-      job_id: j1pvzwzrg
+      job_id: jn5qw8dn5
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 1446.0
       throughput: 691.5629322268327
       estimated_peak_memory_range:
-        min: 86016
-        max: 83516488
+        min: 16384
+        max: 72842240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jygzv7vxp
+      job_id: jwgomy9q5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1350.0
-      throughput: 740.7407407407408
+    torchscript_onnx:
+      inference_time: 1330.0
+      throughput: 751.8796992481203
       estimated_peak_memory_range:
-        min: 24576
-        max: 88328320
+        min: 12288
+        max: 98529504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 55
-      job_id: jvgd7v7zg
+      job_id: jz5wxo03p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:04:50Z'
+    timestamp: '2024-06-22T23:12:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 989.0
-      throughput: 1011.1223458038422
+      inference_time: 986.0
+      throughput: 1014.1987829614604
       estimated_peak_memory_range:
-        min: 12288
-        max: 25458704
+        min: 16384
+        max: 26232304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 38
-      job_id: j7gjklke5
+      job_id: j1gl7nqj5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1017.0
-      throughput: 983.284169124877
+      inference_time: 1021.0
+      throughput: 979.4319294809011
       estimated_peak_memory_range:
-        min: 618496
-        max: 29899792
+        min: 0
+        max: 28437120
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jz5wm9mmg
+      job_id: j1pv43ykp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 978.0
-      throughput: 1022.4948875255624
+    torchscript_onnx:
+      inference_time: 946.0
+      throughput: 1057.0824524312895
       estimated_peak_memory_range:
         min: 0
-        max: 16899936
+        max: 18534672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 55
-      job_id: jz57vdv95
+      job_id: jmg98v7wp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:04:51Z'
+    timestamp: '2024-06-22T23:12:58Z'
   - torchscript_onnx_tflite:
-      inference_time: 1412.0
-      throughput: 708.2152974504249
+      inference_time: 1414.0
+      throughput: 707.2135785007072
       estimated_peak_memory_range:
-        min: 28672
-        max: 154269408
+        min: 45056
+        max: 1517840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 38
-      job_id: jlpe4v4v5
+      job_id: jw56v606p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1473.0
-      throughput: 678.8866259334691
+      inference_time: 1467.0
+      throughput: 681.6632583503749
       estimated_peak_memory_range:
         min: 16384
-        max: 72911032
+        max: 83495696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jnp1q8q7g
+      job_id: jlpe290op
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:04:49Z'
+    timestamp: '2024-06-22T23:12:54Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1413.0
+      throughput: 707.7140835102618
+      estimated_peak_memory_range:
+        min: 28672
+        max: 27219144
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 38
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 38
+      job_id: j1p38kr35
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1461.0
+      throughput: 684.4626967830253
+      estimated_peak_memory_range:
+        min: 12288
+        max: 73021240
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 53
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 53
+      job_id: jygzweqog
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:12:55Z'
   - torchscript_onnx_qnn:
-      inference_time: 1575.0
-      throughput: 634.9206349206349
+      inference_time: 1561.0
+      throughput: 640.6149903907751
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jmg99498g
+      job_id: j7gj1x6vg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1324.0
-      throughput: 755.2870090634441
+    torchscript_onnx:
+      inference_time: 1308.0
+      throughput: 764.525993883792
       estimated_peak_memory_range:
-        min: 28278784
-        max: 28278784
+        min: 28839936
+        max: 28839936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 55
-      job_id: jqp4jwj1p
+      job_id: jnp130k85
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:04:52Z'
+    timestamp: '2024-06-22T23:12:59Z'
diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py
index 4ed971cc..9cb2dcd2 100644
--- a/qai_hub_models/models/resnet18_quantized/export.py
+++ b/qai_hub_models/models/resnet18_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet18_quantized/model.py b/qai_hub_models/models/resnet18_quantized/model.py
index a6c87335..c0c56598 100644
--- a/qai_hub_models/models/resnet18_quantized/model.py
+++ b/qai_hub_models/models/resnet18_quantized/model.py
@@ -78,5 +78,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml
index a76fba6f..85546c98 100644
--- a/qai_hub_models/models/resnet18_quantized/perf.yaml
+++ b/qai_hub_models/models/resnet18_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: ResNet18Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 420.0
-      throughput: 2380.9523809523807
+      inference_time: 419.0
+      throughput: 2386.634844868735
       estimated_peak_memory_range:
         min: 12288
-        max: 1492608
+        max: 1704432
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jo5mvzv95
+      job_id: jz576z1vg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 639.0
-      throughput: 1564.9452269170579
+      inference_time: 637.0
+      throughput: 1569.8587127158555
       estimated_peak_memory_range:
-        min: 24576
-        max: 9441728
+        min: 16384
+        max: 132534952
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: j2p0eren5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 641.0
-      throughput: 1560.0624024960998
-      estimated_peak_memory_range:
-        min: 12288
-        max: 25595784
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 42
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 42
-      job_id: j1glek7mp
+      job_id: jep2j8qr5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:05:34Z'
+    timestamp: '2024-06-22T23:13:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 352.0
-      throughput: 2840.909090909091
+      inference_time: 347.0
+      throughput: 2881.844380403458
       estimated_peak_memory_range:
         min: 16384
-        max: 24707232
+        max: 26805328
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jegnrerq5
+      job_id: jqp48q68g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 475.0
-      throughput: 2105.2631578947367
+      inference_time: 480.0
+      throughput: 2083.3333333333335
       estimated_peak_memory_range:
-        min: 163840
-        max: 28038704
+        min: 0
+        max: 24359200
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: j1p8w7wop
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 479.0
-      throughput: 2087.6826722338205
-      estimated_peak_memory_range:
-        min: 12288
-        max: 20801936
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 42
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 42
-      job_id: jw56q1vyg
+      job_id: jqpynek8g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:05:35Z'
+    timestamp: '2024-06-22T23:13:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 420.0
-      throughput: 2380.9523809523807
+      inference_time: 424.0
+      throughput: 2358.490566037736
       estimated_peak_memory_range:
         min: 12288
-        max: 1230392
+        max: 14983304
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jopr1y17g
+      job_id: j0pxmv83g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 632.0
-      throughput: 1582.2784810126582
+      inference_time: 626.0
+      throughput: 1597.444089456869
       estimated_peak_memory_range:
-        min: 16384
-        max: 8848856
+        min: 12288
+        max: 9090424
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: jn5q929op
+      job_id: j1p88odkp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:05:33Z'
+    timestamp: '2024-06-22T23:13:42Z'
   - torchscript_onnx_tflite:
-      inference_time: 1426.0
-      throughput: 701.2622720897616
+      inference_time: 418.0
+      throughput: 2392.3444976076553
       estimated_peak_memory_range:
-        min: 12288
-        max: 15923968
+        min: 24576
+        max: 127281976
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 39
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 39
+      job_id: jo5m4r1d5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 638.0
+      throughput: 1567.398119122257
+      estimated_peak_memory_range:
+        min: 16384
+        max: 147996392
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 37
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 37
+      job_id: jogkdzwwp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:13:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1444.0
+      throughput: 692.5207756232687
+      estimated_peak_memory_range:
+        min: 16384
+        max: 16990032
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jep23m3qg
+      job_id: jegnx2dk5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:05:28Z'
+    timestamp: '2024-06-22T23:13:36Z'
   - torchscript_onnx_tflite:
-      inference_time: 7066.0
-      throughput: 141.52278516841213
+      inference_time: 7065.0
+      throughput: 141.54281670205236
       estimated_peak_memory_range:
-        min: 40960
-        max: 6406016
+        min: 12288
+        max: 2019328
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 39
-      job_id: jqpyvdvlp
+      job_id: jopr9km0p
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:05:29Z'
+    timestamp: '2024-06-22T23:13:37Z'
   - torchscript_onnx_qnn:
-      inference_time: 742.0
-      throughput: 1347.7088948787061
+      inference_time: 710.0
+      throughput: 1408.4507042253522
       estimated_peak_memory_range:
-        min: 1617920
-        max: 1617920
+        min: 593920
+        max: 593920
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 37
-      job_id: jogkryrn5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 836.0
-      throughput: 1196.1722488038276
-      estimated_peak_memory_range:
-        min: 3690496
-        max: 3690496
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 42
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 42
-      job_id: j1p3qm8n5
+      job_id: j2p0ky895
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:05:36Z'
+    timestamp: '2024-06-22T23:13:41Z'
diff --git a/qai_hub_models/models/resnet50/export.py b/qai_hub_models/models/resnet50/export.py
index 688099b7..5daa5d27 100644
--- a/qai_hub_models/models/resnet50/export.py
+++ b/qai_hub_models/models/resnet50/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml
index be4d4c11..c8c0873a 100644
--- a/qai_hub_models/models/resnet50/perf.yaml
+++ b/qai_hub_models/models/resnet50/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2269.0
-      throughput: 440.72278536800354
+      inference_time: 2286.0
+      throughput: 437.4453193350831
       estimated_peak_memory_range:
-        min: 16384
-        max: 2153680
+        min: 20480
+        max: 2727384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j1pvzw4rg
+      job_id: j7gj1x9vg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2387.0
-      throughput: 418.93590280687056
+      inference_time: 2400.0
+      throughput: 416.6666666666667
       estimated_peak_memory_range:
         min: 622592
-        max: 175232184
+        max: 186116792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jygzv7wxp
+      job_id: jmg98vrwp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2366.0
-      throughput: 422.654268808115
+    torchscript_onnx:
+      inference_time: 2345.0
+      throughput: 426.43923240938165
       estimated_peak_memory_range:
-        min: 12288
-        max: 261165672
+        min: 16384
+        max: 214360704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jvgd7v0zg
+      job_id: j0pxmvq3g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:06:03Z'
+    timestamp: '2024-06-22T23:14:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 1615.0
-      throughput: 619.1950464396285
+      inference_time: 1635.0
+      throughput: 611.6207951070336
       estimated_peak_memory_range:
         min: 12288
-        max: 72992224
+        max: 77847184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j7gjkl1e5
+      job_id: jlpe29qop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1705.0
-      throughput: 586.5102639296188
+      inference_time: 1711.0
+      throughput: 584.4535359438925
       estimated_peak_memory_range:
         min: 0
-        max: 51115584
+        max: 48436560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5wm9xmg
+      job_id: jnp130985
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1750.0
-      throughput: 571.4285714285714
+    torchscript_onnx:
+      inference_time: 1728.0
+      throughput: 578.7037037037037
       estimated_peak_memory_range:
-        min: 618496
-        max: 34613760
+        min: 0
+        max: 32669776
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jz57vd695
+      job_id: jo5m4r7d5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:06:04Z'
+    timestamp: '2024-06-22T23:14:21Z'
   - torchscript_onnx_tflite:
-      inference_time: 2272.0
-      throughput: 440.14084507042253
+      inference_time: 2275.0
+      throughput: 439.56043956043953
       estimated_peak_memory_range:
-        min: 24576
-        max: 1714808
+        min: 28672
+        max: 2508480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jlpe4v2v5
+      job_id: jygzwe6og
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2385.0
-      throughput: 419.2872117400419
+      inference_time: 2381.0
+      throughput: 419.99160016799664
       estimated_peak_memory_range:
-        min: 622592
-        max: 175433648
+        min: 626688
+        max: 185987504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jnp1q837g
+      job_id: jz576zmvg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:06:02Z'
+    timestamp: '2024-06-22T23:14:18Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2278.0
+      throughput: 438.98156277436345
+      estimated_peak_memory_range:
+        min: 24576
+        max: 2160032
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 79
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 79
+      job_id: jz5wxok3p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2393.0
+      throughput: 417.88549937317174
+      estimated_peak_memory_range:
+        min: 618496
+        max: 186249720
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 126
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 126
+      job_id: jqp48q78g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:14:19Z'
   - torchscript_onnx_qnn:
-      inference_time: 2608.0
-      throughput: 383.4355828220859
+      inference_time: 2274.0
+      throughput: 439.7537379067722
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jmg99488g
+      job_id: jvgd0wkrp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2295.0
-      throughput: 435.7298474945534
+    torchscript_onnx:
+      inference_time: 2307.0
+      throughput: 433.4633723450368
       estimated_peak_memory_range:
-        min: 54059008
-        max: 54059008
+        min: 55226368
+        max: 55226368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jqp4jw81p
+      job_id: jegnx24k5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:06:05Z'
+    timestamp: '2024-06-22T23:14:22Z'
diff --git a/qai_hub_models/models/resnet50_quantized/README.md b/qai_hub_models/models/resnet50_quantized/README.md
new file mode 100644
index 00000000..1e962511
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/README.md
@@ -0,0 +1,61 @@
+[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md)
+
+
+# [ResNet50Quantized: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/resnet50_quantized)
+
+ResNet50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases.
+
+This is based on the implementation of ResNet50Quantized found
+[here](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py). This repository contains scripts for optimized on-device
+export suitable to run on Qualcomm® devices. More details on model performance
+accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet50_quantized).
+
+[Sign up](https://myaccount.qualcomm.com/signup) for early access to run these models on
+a hosted Qualcomm® device.
+
+
+
+
+## Example & Usage
+
+Install the package via pip:
+```bash
+pip install "qai_hub_models[resnet50_quantized]"
+```
+
+
+Once installed, run the following simple CLI demo:
+
+```bash
+python -m qai_hub_models.models.resnet50_quantized.demo
+```
+More details on the CLI tool can be found with the `--help` option. See
+[demo.py](demo.py) for sample usage of the model including pre/post processing
+scripts. Please refer to our [general instructions on using
+models](../../../#getting-started) for more usage instructions.
+
+## Export for on-device deployment
+
+This repository contains export scripts that produce a model optimized for
+on-device deployment. This can be run as follows:
+
+```bash
+python -m qai_hub_models.models.resnet50_quantized.export
+```
+Additional options are documented with the `--help` option. Note that the above
+script requires access to Deployment instructions for Qualcomm® AI Hub.
+
+## License
+- The license for the original implementation of ResNet50Quantized can be found
+  [here](https://github.com/pytorch/vision/blob/main/LICENSE).
+- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf)
+
+## References
+* [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
+* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py)
+
+## Community
+* Join [our AI Hub Slack community](https://qualcomm-ai-hub.slack.com/join/shared_invite/zt-2d5zsmas3-Sj0Q9TzslueCjS31eXG2UA#/shared-invite/email) to collaborate, post questions and learn more about on-device AI.
+* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com).
+
+
diff --git a/qai_hub_models/models/resnet50_quantized/__init__.py b/qai_hub_models/models/resnet50_quantized/__init__.py
new file mode 100644
index 00000000..f416a9c4
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/__init__.py
@@ -0,0 +1,10 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.app import (  # noqa: F401
+    ImagenetClassifierApp as App,
+)
+
+from .model import MODEL_ID  # noqa: F401
+from .model import ResNet50Quantizable as Model  # noqa: F401
diff --git a/qai_hub_models/models/resnet50_quantized/conftest.py b/qai_hub_models/models/resnet50_quantized/conftest.py
new file mode 100644
index 00000000..717d124e
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/conftest.py
@@ -0,0 +1,37 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+import inspect
+
+import pytest
+
+from qai_hub_models.models.resnet50_quantized import Model
+
+
+# Instantiate the model only once for all tests.
+# Mock from_pretrained to always return the initialized model.
+# This speeds up tests and limits memory leaks.
+@pytest.fixture(scope="module", autouse=True)
+def cached_from_pretrained():
+    with pytest.MonkeyPatch.context() as mp:
+        pretrained_cache = {}
+        from_pretrained = Model.from_pretrained
+        sig = inspect.signature(from_pretrained)
+
+        def _cached_from_pretrained(*args, **kwargs):
+            cache_key = str(args) + str(kwargs)
+            model = pretrained_cache.get(cache_key, None)
+            if model:
+                return model
+            else:
+                model = from_pretrained(*args, **kwargs)
+                pretrained_cache[cache_key] = model
+                return model
+
+        _cached_from_pretrained.__signature__ = sig
+
+        mp.setattr(Model, "from_pretrained", _cached_from_pretrained)
+        yield mp
diff --git a/qai_hub_models/models/resnet50_quantized/demo.py b/qai_hub_models/models/resnet50_quantized/demo.py
new file mode 100644
index 00000000..02dc3292
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/demo.py
@@ -0,0 +1,14 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo
+from qai_hub_models.models.resnet50_quantized.model import MODEL_ID, ResNet50Quantizable
+
+
+def main(is_test: bool = False):
+    imagenet_demo(ResNet50Quantizable, MODEL_ID, is_test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet50_quantized/evaluate.py b/qai_hub_models/models/resnet50_quantized/evaluate.py
new file mode 100644
index 00000000..8fdc840e
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/evaluate.py
@@ -0,0 +1,62 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import warnings
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet50_quantized import MODEL_ID, Model
+from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs
+from qai_hub_models.utils.base_model import BaseModel
+from qai_hub_models.utils.evaluate import evaluate_on_dataset
+from qai_hub_models.utils.inference import compile_model_from_args
+from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin
+
+SUPPORTED_DATASETS = ["imagenette", "imagenet"]
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = evaluate_parser(
+        model_cls=Model,
+        default_split_size=2500,
+        supported_datasets=SUPPORTED_DATASETS,
+    )
+    args = parser.parse_args()
+    args.device = None
+
+    if args.hub_model_id is not None:
+        hub_model = hub.get_model(args.hub_model_id)
+    else:
+        hub_model = compile_model_from_args(
+            MODEL_ID, args, get_model_kwargs(Model, vars(args))
+        )
+    hub_device = get_hub_device(None, args.chipset)
+
+    # Use Fp16 model for torch inference
+    for cls in Model.__mro__:
+        if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin):
+            torch_cls = cls
+            break
+    torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args)))
+    evaluate_on_dataset(
+        hub_model,
+        torch_model,
+        hub_device,
+        args.dataset_name,
+        args.split_size,
+        args.num_samples,
+        args.seed,
+        args.profile_options,
+        args.use_cache,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet50_quantized/export.py b/qai_hub_models/models/resnet50_quantized/export.py
new file mode 100644
index 00000000..167292ed
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/export.py
@@ -0,0 +1,236 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY.
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+import qai_hub as hub
+
+from qai_hub_models.models.resnet50_quantized import Model
+from qai_hub_models.utils.args import (
+    export_parser,
+    get_input_spec_kwargs,
+    get_model_kwargs,
+)
+from qai_hub_models.utils.base_model import TargetRuntime
+from qai_hub_models.utils.compare import torch_inference
+from qai_hub_models.utils.printing import (
+    print_inference_metrics,
+    print_on_target_demo_cmd,
+    print_profile_metrics_from_job,
+)
+from qai_hub_models.utils.qai_hub_helpers import (
+    can_access_qualcomm_ai_hub,
+    export_without_hub_access,
+    transpose_channel_first_to_last,
+)
+from qai_hub_models.utils.qnn_helpers import get_qnn_inputs
+
+
+def export_model(
+    device: str = "Samsung Galaxy S23 (Family)",
+    chipset: Optional[str] = None,
+    skip_profiling: bool = False,
+    skip_inferencing: bool = False,
+    skip_downloading: bool = False,
+    skip_summary: bool = False,
+    output_dir: Optional[str] = None,
+    target_runtime: TargetRuntime = TargetRuntime.TFLITE,
+    compile_options: str = "",
+    profile_options: str = "",
+    **additional_model_kwargs,
+) -> Tuple[hub.CompileJob, Optional[hub.ProfileJob], Optional[hub.InferenceJob]] | List[
+    str
+]:
+    """
+    This function accomplishes 6 main tasks:
+
+        1. Instantiates a PyTorch model and converts it to a traced TorchScript format.
+        2. Compiles the model to an asset that can be run on device.
+        3. Profiles the model performance on real devices.
+        4. Inferences the model on sample inputs.
+        5. Downloads the model asset to the local directory.
+        6. Summarizes the results from profiling and inference.
+
+    Each of the last four steps can be optionally skipped using the input options.
+
+    Parameters:
+        device: Device for which to export the model.
+            Full list of available devices can be found by running `hub.get_devices()`.
+            Defaults to DEFAULT_DEVICE if not specified.
+        chipset: If set, will choose a random device with this chipset.
+            Overrides the `device` argument.
+        skip_profiling: If set, skips profiling of compiled model on real devices.
+        skip_inferencing: If set, skips computing on-device outputs from sample data.
+        skip_downloading: If set, skips downloading of compiled model.
+        skip_summary: If set, skips waiting for and summarizing results
+            from profiling and inference.
+        output_dir: Directory to store generated assets (e.g. compiled model).
+            Defaults to `<cwd>/build/<model_name>`.
+        target_runtime: Which on-device runtime to target. Default is TFLite.
+        compile_options: Additional options to pass when submitting the compile job.
+        profile_options: Additional options to pass when submitting the profile job.
+        **additional_model_kwargs: Additional optional kwargs used to customize
+            `model_cls.from_pretrained` and `model.get_input_spec`
+
+    Returns:
+        A 3-tuple of:
+            * A CompileJob object containing metadata about the compile job submitted to hub.
+            * A ProfileJob containing metadata about the profile job (None if profiling skipped).
+            * An InferenceJob containing metadata about the inference job (None if inferencing skipped).
+    """
+    model_name = "resnet50_quantized"
+    output_path = Path(output_dir or Path.cwd() / "build" / model_name)
+    if chipset:
+        hub_device = hub.Device(attributes=f"chipset:{chipset}")
+    else:
+        hub_device = hub.Device(name=device)
+    if not can_access_qualcomm_ai_hub():
+        return export_without_hub_access(
+            "resnet50_quantized",
+            "ResNet50Quantized",
+            device,
+            skip_profiling,
+            skip_inferencing,
+            skip_downloading,
+            skip_summary,
+            output_path,
+            target_runtime,
+            compile_options,
+            profile_options,
+        )
+
+    # 1. Initialize PyTorch model
+    model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
+    input_spec = model.get_input_spec(
+        **get_input_spec_kwargs(model, additional_model_kwargs)
+    )
+
+    # Trace the model
+    source_model = model.convert_to_hub_source_model(
+        target_runtime, output_path, input_spec
+    )
+    if target_runtime == TargetRuntime.TFLITE:
+        quant_calibration_data = None
+    else:
+        quant_calibration_data = model.get_calibration_data(target_runtime, input_spec)
+
+    # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
+    channel_last_flags = (
+        " --force_channel_last_input image_tensor"
+        if target_runtime != TargetRuntime.ONNX
+        else ""
+    )
+
+    # 2. Compile the model to an on-device asset
+    model_compile_options = model.get_hub_compile_options(
+        target_runtime, compile_options + channel_last_flags, hub_device
+    )
+    print(f"Optimizing model {model_name} to run on-device")
+    submitted_compile_job = hub.submit_compile_job(
+        model=source_model,
+        input_specs=input_spec,
+        device=hub_device,
+        name=model_name,
+        calibration_data=quant_calibration_data,
+        options=model_compile_options,
+    )
+    compile_job = cast(hub.client.CompileJob, submitted_compile_job)
+
+    # 3. Profile the model asset on real devices
+    profile_job: Optional[hub.client.ProfileJob] = None
+    if not skip_profiling:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(f"Profiling model {model_name} on a hosted device.")
+        submitted_profile_job = hub.submit_profile_job(
+            model=compile_job.get_target_model(),
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        profile_job = cast(hub.client.ProfileJob, submitted_profile_job)
+
+    # 4. Run inference on-device with sample inputs
+    inference_job: Optional[hub.client.InferenceJob] = None
+    if not skip_inferencing:
+        profile_options_all = model.get_hub_profile_options(
+            target_runtime, profile_options
+        )
+        print(
+            f"Running inference for {model_name} on a hosted device with example inputs."
+        )
+        sample_inputs = model.sample_inputs(input_spec)
+        hub_inputs = sample_inputs
+        if target_runtime == TargetRuntime.QNN:
+            hub_inputs = get_qnn_inputs(compile_job, sample_inputs)
+        # Convert inputs from channel first to channel last
+        hub_inputs = (
+            sample_inputs
+            if target_runtime == TargetRuntime.ONNX
+            else transpose_channel_first_to_last(
+                "image_tensor", sample_inputs, target_runtime
+            )
+        )
+        submitted_inference_job = hub.submit_inference_job(
+            model=compile_job.get_target_model(),
+            inputs=hub_inputs,
+            device=hub_device,
+            name=model_name,
+            options=profile_options_all,
+        )
+        inference_job = cast(hub.client.InferenceJob, submitted_inference_job)
+
+    # 5. Download the model asset to a local file
+    if not skip_downloading:
+        if target_runtime == TargetRuntime.QNN:
+            target_runtime_extension = "so"
+        elif target_runtime == TargetRuntime.TFLITE:
+            target_runtime_extension = "tflite"
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
+            target_runtime_extension = "onnx"
+
+        os.makedirs(output_path, exist_ok=True)
+        target_model: hub.Model = compile_job.get_target_model()  # type: ignore
+        target_model.download(
+            str(output_path / f"{model_name}.{target_runtime_extension}")
+        )
+
+    # 6. Summarize the results from profiling and inference
+    if not skip_summary and not skip_profiling:
+        assert profile_job is not None and profile_job.wait().success
+        profile_data: Dict[str, Any] = profile_job.download_profile()  # type: ignore
+        print_profile_metrics_from_job(profile_job, profile_data)
+
+    if not skip_summary and not skip_inferencing:
+        torch_out = torch_inference(model, sample_inputs)
+        assert inference_job is not None and inference_job.wait().success
+        inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
+        print_inference_metrics(
+            inference_job, inference_result, torch_out, metrics="psnr,top1,top5"
+        )
+
+    if not skip_summary:
+        print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)
+
+    return (compile_job, profile_job, inference_job)
+
+
+def main():
+    warnings.filterwarnings("ignore")
+    parser = export_parser(model_cls=Model)
+    args = parser.parse_args()
+    export_model(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qai_hub_models/models/resnet50_quantized/info.yaml b/qai_hub_models/models/resnet50_quantized/info.yaml
new file mode 100644
index 00000000..dd32d352
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/info.yaml
@@ -0,0 +1,44 @@
+name: ResNet50Quantized
+# id must match with the model dir name in qai_hub_models
+id: resnet50_quantized
+status: public
+headline: Imagenet classifier and general purpose backbone.
+domain: Computer Vision
+use_case: Image Classification
+description: ResNet50 is a machine learning model that can classify images from the
+  Imagenet dataset. It can also be used as a backbone in building more complex models
+  for specific use cases.
+tags:
+  - backbone
+  - quantized
+research_paper: https://arxiv.org/abs/1512.03385
+research_paper_title: Deep Residual Learning for Image Recognition
+license: https://github.com/pytorch/vision/blob/main/LICENSE
+deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf
+source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
+technical_details:
+  Model checkpoint: Imagenet
+  Input resolution: 224x224
+  Number of parameters: 25.5M
+  Model size: 25.1 MB
+applicable_scenarios:
+  - Medical Imaging
+  - Anomaly Detection
+  - Inventory Management
+related_models:
+  - mobilenet_v2
+  - densenet121
+  - googlenet
+form_factors:
+  - Phone
+  - Tablet
+  - IoT
+  - XR
+has_static_banner: yes
+has_animated_banner: yes
+license_type: bsd-3-clause
+deploy_license_type: AI Model Hub License
+dataset:
+  - imagenet-1k
+  - imagenet-22k
+labels_file: imagenet_labels.txt
diff --git a/qai_hub_models/models/resnet50_quantized/model.py b/qai_hub_models/models/resnet50_quantized/model.py
new file mode 100644
index 00000000..54f44eb1
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/model.py
@@ -0,0 +1,81 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from __future__ import annotations
+
+# isort: off
+# This verifies aimet is installed, and this must be included first.
+from qai_hub_models.utils.quantization_aimet import (
+    AIMETQuantizableMixin,
+    constrain_quantized_inputs_to_image_range,
+)
+
+# isort: on
+
+import torch
+from aimet_torch.cross_layer_equalization import equalize_model
+from aimet_torch.model_preparer import prepare_model
+from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim
+
+from qai_hub_models.models.resnet50.model import ResNet50
+from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config
+from qai_hub_models.utils.asset_loaders import CachedWebModelAsset
+
+MODEL_ID = __name__.split(".")[-2]
+MODEL_ASSET_VERSION = 6
+DEFAULT_ENCODINGS = "resnet50_quantized_encodings.json"
+
+
+class ResNet50Quantizable(AIMETQuantizableMixin, ResNet50):
+    """ResNet with post train quantization support.
+
+    Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints.
+    Support for quantizing using your own weights & data will come at a later date."""
+
+    def __init__(
+        self,
+        resnet50_model: QuantizationSimModel,
+    ) -> None:
+        # Input is already normalized by sim_model. Disable it in the wrapper model.
+        ResNet50.__init__(self, resnet50_model.model, normalize_input=False)
+        AIMETQuantizableMixin.__init__(
+            self,
+            resnet50_model,
+        )
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        aimet_encodings: str | None = "DEFAULT",
+    ) -> "ResNet50Quantizable":
+        """
+        Parameters:
+          aimet_encodings:
+            if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette.
+            elif None: Doesn't load any encodings. Used when computing encodings.
+            else: Interprets as a filepath and loads the encodings stored there.
+        """
+        model = ResNet50.from_pretrained()
+        input_shape = cls.get_input_spec()["image_tensor"][0]
+
+        model = prepare_model(model)
+        equalize_model(model, input_shape)
+        sim = QuantizationSimModel(
+            model,
+            quant_scheme="tf_enhanced",
+            default_param_bw=8,
+            default_output_bw=8,
+            config_file=get_default_aimet_config(),
+            dummy_input=torch.rand(input_shape),
+        )
+        constrain_quantized_inputs_to_image_range(sim)
+
+        if aimet_encodings:
+            if aimet_encodings == "DEFAULT":
+                aimet_encodings = CachedWebModelAsset.from_asset_store(
+                    MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS
+                ).fetch()
+            load_encodings_to_sim(sim, aimet_encodings)
+
+        return cls(sim)
diff --git a/qai_hub_models/models/resnet50_quantized/perf.yaml b/qai_hub_models/models/resnet50_quantized/perf.yaml
new file mode 100644
index 00000000..f8dcd34d
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/perf.yaml
@@ -0,0 +1,265 @@
+aggregated:
+  supported_oses:
+  - Android
+  supported_devices:
+  - Google Pixel 3
+  - Google Pixel 3a
+  - Google Pixel 3a XL
+  - Google Pixel 4
+  - Google Pixel 4a
+  - Google Pixel 5a 5G
+  - QCS8250 (Proxy)
+  - QCS8550 (Proxy)
+  - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
+  - Samsung Galaxy S21
+  - Samsung Galaxy S21 Ultra
+  - Samsung Galaxy S21+
+  - Samsung Galaxy S22 5G
+  - Samsung Galaxy S22 Ultra 5G
+  - Samsung Galaxy S22+ 5G
+  - Samsung Galaxy S23
+  - Samsung Galaxy S23 Ultra
+  - Samsung Galaxy S23+
+  - Samsung Galaxy S24
+  - Samsung Galaxy S24 Ultra
+  - Samsung Galaxy S24+
+  - Samsung Galaxy Tab S8
+  - Snapdragon X Elite CRD
+  - Xiaomi 12
+  - Xiaomi 12 Pro
+  supported_chipsets:
+  - Qcs8250
+  - Qcs8550
+  - Sa8540p
+  - Sa8775p
+  - Snapdragon® 8 Gen 1
+  - Snapdragon® 8 Gen 2
+  - Snapdragon® 8 Gen 3
+  - Snapdragon® 888
+  - Snapdragon® X Elite
+models:
+- name: ResNet50Quantized
+  performance_metrics:
+  - torchscript_onnx_tflite:
+      inference_time: 814.0
+      throughput: 1228.5012285012285
+      estimated_peak_memory_range:
+        min: 28672
+        max: 1573280
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: jep2j81r5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 997.0
+      throughput: 1003.0090270812437
+      estimated_peak_memory_range:
+        min: 12288
+        max: 7720656
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: j1gl7ndj5
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S23
+      os: '13'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 2
+    timestamp: '2024-06-22T23:15:30Z'
+  - torchscript_onnx_tflite:
+      inference_time: 616.0
+      throughput: 1623.3766233766235
+      estimated_peak_memory_range:
+        min: 16384
+        max: 63028352
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: jqpynel8g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 769.0
+      throughput: 1300.3901170351105
+      estimated_peak_memory_range:
+        min: 163840
+        max: 38764864
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: jw56v6x6p
+      job_status: Passed
+    reference_device_info:
+      name: Samsung Galaxy S24
+      os: '14'
+      form_factor: Phone
+      os_name: Android
+      manufacturer: Samsung
+      chipset: Snapdragon® 8 Gen 3
+    timestamp: '2024-06-22T23:15:31Z'
+  - torchscript_onnx_tflite:
+      inference_time: 788.0
+      throughput: 1269.0355329949239
+      estimated_peak_memory_range:
+        min: 40960
+        max: 1460544
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: j2p0kyw95
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 994.0
+      throughput: 1006.0362173038229
+      estimated_peak_memory_range:
+        min: 16384
+        max: 262208304
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: jwgomyxq5
+      job_status: Passed
+    reference_device_info:
+      name: QCS8550 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8550
+    timestamp: '2024-06-22T23:15:34Z'
+  - torchscript_onnx_tflite:
+      inference_time: 816.0
+      throughput: 1225.4901960784314
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1740040
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: j1p88onkp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1007.0
+      throughput: 993.0486593843099
+      estimated_peak_memory_range:
+        min: 16384
+        max: 273027080
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: j1pv43jkp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:15:35Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2798.0
+      throughput: 357.39814152966403
+      estimated_peak_memory_range:
+        min: 12288
+        max: 24437488
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: jogkdz1wp
+      job_status: Passed
+    reference_device_info:
+      name: RB3 Gen 2 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs6490
+    timestamp: '2024-06-22T23:15:27Z'
+  - torchscript_onnx_tflite:
+      inference_time: 11354.0
+      throughput: 88.07468733485996
+      estimated_peak_memory_range:
+        min: 12288
+        max: 3005536
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: jn5qw8nn5
+      job_status: Passed
+    reference_device_info:
+      name: RB5 (Proxy)
+      os: '12'
+      form_factor: Iot
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Qcs8250
+    timestamp: '2024-06-22T23:15:29Z'
+  - torchscript_onnx_qnn:
+      inference_time: 986.0
+      throughput: 1014.1987829614604
+      estimated_peak_memory_range:
+        min: 557056
+        max: 557056
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: j1p38kd35
+      job_status: Passed
+    reference_device_info:
+      name: Snapdragon X Elite CRD
+      os: '11'
+      form_factor: Compute
+      os_name: Windows
+      manufacturer: Qualcomm
+      chipset: Snapdragon® X Elite
+    timestamp: '2024-06-22T23:15:32Z'
diff --git a/qai_hub_models/models/resnet50_quantized/requirements.txt b/qai_hub_models/models/resnet50_quantized/requirements.txt
new file mode 100644
index 00000000..e3567f29
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/requirements.txt
@@ -0,0 +1 @@
+aimet-torch==1.31.2; sys_platform == "linux"
diff --git a/qai_hub_models/models/resnet50_quantized/test.py b/qai_hub_models/models/resnet50_quantized/test.py
new file mode 100644
index 00000000..55efb858
--- /dev/null
+++ b/qai_hub_models/models/resnet50_quantized/test.py
@@ -0,0 +1,30 @@
+# ---------------------------------------------------------------------
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+from qai_hub_models.models._shared.imagenet_classifier.test_utils import (
+    run_imagenet_classifier_test,
+)
+from qai_hub_models.models.resnet50_quantized.demo import main as demo_main
+from qai_hub_models.models.resnet50_quantized.model import (
+    MODEL_ASSET_VERSION,
+    MODEL_ID,
+    ResNet50Quantizable,
+)
+
+
+def test_task():
+    run_imagenet_classifier_test(
+        ResNet50Quantizable.from_pretrained(),
+        MODEL_ID,
+        probability_threshold=0.45,
+        diff_tol=0.005,
+        rtol=0.02,
+        atol=0.2,
+        asset_version=MODEL_ASSET_VERSION,
+    )
+
+
+def test_demo():
+    # Verify demo does not crash
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/resnext101/export.py b/qai_hub_models/models/resnext101/export.py
index e4b05a65..8bf63f21 100644
--- a/qai_hub_models/models/resnext101/export.py
+++ b/qai_hub_models/models/resnext101/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml
index 499cef3a..bcacfdf7 100644
--- a/qai_hub_models/models/resnext101/perf.yaml
+++ b/qai_hub_models/models/resnext101/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ResNeXt101
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6774.0
-      throughput: 147.62326542663124
+      inference_time: 6589.0
+      throughput: 151.76809834572774
       estimated_peak_memory_range:
-        min: 24576
-        max: 2449424
+        min: 20480
+        max: 2663064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: j7gjkloe5
+      job_id: jvgd0wjrp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6930.0
-      throughput: 144.3001443001443
+      inference_time: 6611.0
+      throughput: 151.26304643775526
       estimated_peak_memory_range:
-        min: 16384
-        max: 36101088
+        min: 12288
+        max: 38091512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jz5wm98mg
+      job_id: jvgd0wjzp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6834.0
-      throughput: 146.3271875914545
+    torchscript_onnx:
+      inference_time: 7046.0
+      throughput: 141.92449616803862
       estimated_peak_memory_range:
-        min: 159744
-        max: 453366256
+        min: 16384
+        max: 442476272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jz5wm984g
+      job_id: jegnx2jq5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:07:53Z'
+    timestamp: '2024-06-22T23:16:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 4859.0
-      throughput: 205.80366330520684
+      inference_time: 4688.0
+      throughput: 213.31058020477815
       estimated_peak_memory_range:
         min: 20480
-        max: 364879056
+        max: 365498784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jlpe4v8v5
+      job_id: jz5wxojmp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 4800.0
-      throughput: 208.33333333333334
+      inference_time: 4818.0
+      throughput: 207.55500207555002
       estimated_peak_memory_range:
-        min: 0
-        max: 126702208
+        min: 618496
+        max: 120923264
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jmg994k8g
+      job_id: jz576zq9g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5102.0
-      throughput: 196.0015680125441
+    torchscript_onnx:
+      inference_time: 4887.0
+      throughput: 204.62451401677922
       estimated_peak_memory_range:
-        min: 0
-        max: 91577616
+        min: 622592
+        max: 86448960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jmg994kmg
+      job_id: jopr9kz7p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:07:54Z'
+    timestamp: '2024-06-22T23:16:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 6604.0
-      throughput: 151.42337976983646
+      inference_time: 6452.0
+      throughput: 154.99070055796653
       estimated_peak_memory_range:
-        min: 20480
-        max: 3255112
+        min: 32768
+        max: 2593208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 147
-      job_id: jygzv78xp
+      job_id: jmg98v68p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 6571.0
-      throughput: 152.18383807639628
+      inference_time: 6644.0
+      throughput: 150.51173991571343
       estimated_peak_memory_range:
-        min: 0
-        max: 35912680
+        min: 12288
+        max: 35889312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jvgd7v8zg
+      job_id: j0pxmvwlg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:07:52Z'
+    timestamp: '2024-06-22T23:16:16Z'
+  - torchscript_onnx_tflite:
+      inference_time: 6481.0
+      throughput: 154.29717636167257
+      estimated_peak_memory_range:
+        min: 36864
+        max: 204925088
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 147
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 147
+      job_id: jnp130r75
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 6714.0
+      throughput: 148.94250819183796
+      estimated_peak_memory_range:
+        min: 12288
+        max: 35760104
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 245
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 245
+      job_id: jo5m4rj95
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:16:17Z'
   - torchscript_onnx_qnn:
-      inference_time: 9160.0
-      throughput: 109.17030567685589
+      inference_time: 6823.0
+      throughput: 146.5630954125751
       estimated_peak_memory_range:
-        min: 913408
-        max: 913408
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 245
-      job_id: jnp1q877g
+      job_id: jqp48qz1g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6731.0
-      throughput: 148.5663348685188
+    torchscript_onnx:
+      inference_time: 6711.0
+      throughput: 149.00908955446283
       estimated_peak_memory_range:
-        min: 117399552
-        max: 117399552
+        min: 128811008
+        max: 128811008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 247
-      job_id: jnp1q87ng
+      job_id: jep2j82q5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:07:55Z'
+    timestamp: '2024-06-22T23:16:20Z'
diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py
index 007bc290..16037094 100644
--- a/qai_hub_models/models/resnext101_quantized/export.py
+++ b/qai_hub_models/models/resnext101_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext101_quantized/model.py b/qai_hub_models/models/resnext101_quantized/model.py
index 521a1463..82c48e43 100644
--- a/qai_hub_models/models/resnext101_quantized/model.py
+++ b/qai_hub_models/models/resnext101_quantized/model.py
@@ -78,5 +78,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml
index 1537f3e3..becd66c5 100644
--- a/qai_hub_models/models/resnext101_quantized/perf.yaml
+++ b/qai_hub_models/models/resnext101_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: ResNeXt101Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2846.0
-      throughput: 351.37034434293747
+      inference_time: 2769.0
+      throughput: 361.14120621162874
       estimated_peak_memory_range:
-        min: 28672
-        max: 2113784
+        min: 20480
+        max: 2649408
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jqp4jwm2p
+      job_id: j2p0kynn5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3084.0
-      throughput: 324.25421530479895
+      inference_time: 3056.0
+      throughput: 327.22513089005236
       estimated_peak_memory_range:
-        min: 16384
-        max: 35906456
+        min: 12288
+        max: 34308616
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jep23m46g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3364.0
-      throughput: 297.2651605231867
-      estimated_peak_memory_range:
-        min: 12288
-        max: 140467400
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 151
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 151
-      job_id: jogkry9v5
+      job_id: j1p38kkn5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:09:57Z'
+    timestamp: '2024-06-22T23:18:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 2072.0
-      throughput: 482.6254826254826
+      inference_time: 2080.0
+      throughput: 480.7692307692308
       estimated_peak_memory_range:
         min: 12288
-        max: 258677904
+        max: 263165600
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: j0pxe1385
+      job_id: j1p88olop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2331.0
-      throughput: 429.000429000429
+      inference_time: 2282.0
+      throughput: 438.21209465381247
       estimated_peak_memory_range:
-        min: 12288
-        max: 119524448
+        min: 24576
+        max: 115774864
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: jqpyvdq0p
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2469.0
-      throughput: 405.0222762251924
-      estimated_peak_memory_range:
-        min: 12288
-        max: 93879712
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 151
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 151
-      job_id: jn5q92mep
+      job_id: jwgomyyk5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:09:58Z'
+    timestamp: '2024-06-22T23:18:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 2846.0
-      throughput: 351.37034434293747
+      inference_time: 2808.0
+      throughput: 356.1253561253561
       estimated_peak_memory_range:
-        min: 16384
-        max: 2438744
+        min: 28672
+        max: 2175304
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jo5mvzo75
+      job_id: jogkdzjnp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3060.0
-      throughput: 326.797385620915
+      inference_time: 2990.0
+      throughput: 334.44816053511704
       estimated_peak_memory_range:
-        min: 16384
-        max: 35555384
+        min: 20480
+        max: 33832856
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j1p8w74qp
+      job_id: j7gj1xxeg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:09:56Z'
+    timestamp: '2024-06-22T23:18:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 10195.0
-      throughput: 98.0872976949485
+      inference_time: 2754.0
+      throughput: 363.10820624546113
+      estimated_peak_memory_range:
+        min: 57344
+        max: 2549704
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 148
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 148
+      job_id: jn5qw8jo5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 3026.0
+      throughput: 330.4692663582287
+      estimated_peak_memory_range:
+        min: 16384
+        max: 34367352
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 146
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 146
+      job_id: jlpe299vp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:18:24Z'
+  - torchscript_onnx_tflite:
+      inference_time: 10200.0
+      throughput: 98.03921568627452
       estimated_peak_memory_range:
         min: 53248
-        max: 195935712
+        max: 200216400
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 148
-      job_id: jegnreoj5
+      job_id: j1gl7nnm5
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:09:51Z'
+    timestamp: '2024-06-22T23:18:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 131262.0
-      throughput: 7.618351084091359
+      inference_time: 133033.0
+      throughput: 7.516931889080153
       estimated_peak_memory_range:
         min: 12288
-        max: 356618752
+        max: 353652680
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 125
         layers_on_cpu: 11
         total_layers: 148
-      job_id: jopr1yokg
+      job_id: jw56v66yp
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:09:52Z'
+    timestamp: '2024-06-22T23:18:18Z'
   - torchscript_onnx_qnn:
-      inference_time: 3311.0
-      throughput: 302.0235578375113
+      inference_time: 3044.0
+      throughput: 328.515111695138
       estimated_peak_memory_range:
-        min: 262144
-        max: 262144
+        min: 290816
+        max: 290816
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 146
-      job_id: j2p0erv05
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 3294.0
-      throughput: 303.58227079538557
-      estimated_peak_memory_range:
-        min: 12066816
-        max: 12066816
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 151
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 151
-      job_id: j1glek12p
+      job_id: j1pv433rp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:09:59Z'
+    timestamp: '2024-06-22T23:18:21Z'
diff --git a/qai_hub_models/models/resnext50/export.py b/qai_hub_models/models/resnext50/export.py
index ecbf5998..b7d7e673 100644
--- a/qai_hub_models/models/resnext50/export.py
+++ b/qai_hub_models/models/resnext50/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml
index 6ce25b12..44c68714 100644
--- a/qai_hub_models/models/resnext50/perf.yaml
+++ b/qai_hub_models/models/resnext50/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: ResNeXt50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2511.0
-      throughput: 398.24771007566704
+      inference_time: 2467.0
+      throughput: 405.35062829347385
       estimated_peak_memory_range:
-        min: 12288
-        max: 2265792
+        min: 16384
+        max: 2377904
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j1p3qmwm5
+      job_id: jqp48qq1g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2553.0
-      throughput: 391.6960438699569
+      inference_time: 2558.0
+      throughput: 390.93041438623925
       estimated_peak_memory_range:
-        min: 57344
-        max: 21403728
+        min: 16384
+        max: 98356192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: j7gjklw15
+      job_id: jopr9kk7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2768.0
-      throughput: 361.271676300578
+    torchscript_onnx:
+      inference_time: 2763.0
+      throughput: 361.92544335866813
       estimated_peak_memory_range:
-        min: 16384
-        max: 171552072
+        min: 12288
+        max: 174229864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jmg994xmg
+      job_id: jogkdzznp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:10:26Z'
+    timestamp: '2024-06-22T23:19:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 1800.0
-      throughput: 555.5555555555555
+      inference_time: 1772.0
+      throughput: 564.3340857787811
       estimated_peak_memory_range:
-        min: 16384
-        max: 163995360
+        min: 12288
+        max: 178219440
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jwgoev41p
+      job_id: j0pxmvvlg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1878.0
-      throughput: 532.4813631522896
+      inference_time: 1847.0
+      throughput: 541.4185165132648
       estimated_peak_memory_range:
-        min: 0
-        max: 60231440
+        min: 618496
+        max: 58117920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jlpe4vl85
+      job_id: jep2j88q5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1929.0
-      throughput: 518.4033177812338
+    torchscript_onnx:
+      inference_time: 1955.0
+      throughput: 511.5089514066496
       estimated_peak_memory_range:
         min: 618496
-        max: 41928304
+        max: 37332368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jnp1q8vng
+      job_id: jn5qw88o5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:10:27Z'
+    timestamp: '2024-06-22T23:19:01Z'
   - torchscript_onnx_tflite:
-      inference_time: 2498.0
-      throughput: 400.320256204964
+      inference_time: 2479.0
+      throughput: 403.3884630899556
       estimated_peak_memory_range:
-        min: 20480
-        max: 2219560
+        min: 16384
+        max: 2012488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: j1pvzw9zg
+      job_id: jo5m4rr95
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2553.0
-      throughput: 391.6960438699569
+      inference_time: 2546.0
+      throughput: 392.77297721916733
       estimated_peak_memory_range:
-        min: 20480
-        max: 88251120
+        min: 446464
+        max: 88449504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5wm914g
+      job_id: j2p0kyyn5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:10:25Z'
+    timestamp: '2024-06-22T23:18:58Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2464.0
+      throughput: 405.84415584415586
+      estimated_peak_memory_range:
+        min: 20480
+        max: 2390560
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 79
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 79
+      job_id: jegnx22q5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2557.0
+      throughput: 391.08330074305826
+      estimated_peak_memory_range:
+        min: 622592
+        max: 88352592
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 126
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 126
+      job_id: j1p88ooop
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:18:59Z'
   - torchscript_onnx_qnn:
-      inference_time: 2941.0
-      throughput: 340.02040122407345
+      inference_time: 2635.0
+      throughput: 379.5066413662239
       estimated_peak_memory_range:
-        min: 1044480
-        max: 1044480
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jygzv744p
+      job_id: jqpyneelg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2624.0
-      throughput: 381.0975609756098
+    torchscript_onnx:
+      inference_time: 2628.0
+      throughput: 380.517503805175
       estimated_peak_memory_range:
-        min: 46874624
-        max: 46874624
+        min: 39387136
+        max: 39387136
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jvgd7vz6g
+      job_id: j1gl7nzm5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:10:28Z'
+    timestamp: '2024-06-22T23:19:02Z'
diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py
index 73b7aa57..484ba217 100644
--- a/qai_hub_models/models/resnext50_quantized/export.py
+++ b/qai_hub_models/models/resnext50_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/resnext50_quantized/model.py b/qai_hub_models/models/resnext50_quantized/model.py
index 0bbd1d35..101378c3 100644
--- a/qai_hub_models/models/resnext50_quantized/model.py
+++ b/qai_hub_models/models/resnext50_quantized/model.py
@@ -78,5 +78,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml
index b5dae635..76846bce 100644
--- a/qai_hub_models/models/resnext50_quantized/perf.yaml
+++ b/qai_hub_models/models/resnext50_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: ResNeXt50Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 943.0
-      throughput: 1060.4453870625662
+      inference_time: 944.0
+      throughput: 1059.322033898305
       estimated_peak_memory_range:
-        min: 32768
-        max: 1732496
+        min: 24576
+        max: 1806704
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jqp4jw92p
+      job_id: j1p38k3n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1179.0
-      throughput: 848.1764206955047
+      inference_time: 1167.0
+      throughput: 856.898029134533
       estimated_peak_memory_range:
-        min: 20480
-        max: 66746984
+        min: 16384
+        max: 67744776
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jep23mv6g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1353.0
-      throughput: 739.0983000739099
-      estimated_peak_memory_range:
-        min: 28672
-        max: 79646016
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 83
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 83
-      job_id: jn5q92vep
+      job_id: jz5wxovmp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:11:25Z'
+    timestamp: '2024-06-22T23:19:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 710.0
-      throughput: 1408.4507042253522
+      inference_time: 706.0
+      throughput: 1416.4305949008499
       estimated_peak_memory_range:
-        min: 1523712
-        max: 101683104
+        min: 12288
+        max: 102621408
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: j0pxe1d85
+      job_id: jwgomy0k5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 873.0
-      throughput: 1145.475372279496
+      inference_time: 864.0
+      throughput: 1157.4074074074074
       estimated_peak_memory_range:
         min: 163840
-        max: 57724624
+        max: 52528384
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jqpyvd70p
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 991.0
-      throughput: 1009.0817356205853
-      estimated_peak_memory_range:
-        min: 28672
-        max: 41643216
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 83
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 83
-      job_id: j1glekl2p
+      job_id: jmg98v18p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:11:26Z'
+    timestamp: '2024-06-22T23:19:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 944.0
-      throughput: 1059.322033898305
+      inference_time: 917.0
+      throughput: 1090.5125408942204
       estimated_peak_memory_range:
         min: 12288
-        max: 2151184
+        max: 32429552
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jo5mvzd75
+      job_id: j1pv43orp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1183.0
-      throughput: 845.30853761623
+      inference_time: 1166.0
+      throughput: 857.6329331046312
       estimated_peak_memory_range:
-        min: 16384
-        max: 66707936
+        min: 20480
+        max: 11826992
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jogkry8v5
+      job_id: jvgd0w9zp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:11:24Z'
+    timestamp: '2024-06-22T23:20:01Z'
   - torchscript_onnx_tflite:
-      inference_time: 3287.0
-      throughput: 304.228780042592
+      inference_time: 936.0
+      throughput: 1068.3760683760684
       estimated_peak_memory_range:
-        min: 12288
-        max: 55813072
+        min: 16384
+        max: 1454864
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jegnre7j5
+      job_id: j7gj1xmeg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1174.0
+      throughput: 851.7887563884157
+      estimated_peak_memory_range:
+        min: 16384
+        max: 65522632
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: jz576zw9g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:20:02Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3071.0
+      throughput: 325.626831650928
+      estimated_peak_memory_range:
+        min: 24576
+        max: 56368512
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: jlpe291vp
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:11:19Z'
+    timestamp: '2024-06-22T23:19:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 64039.0
-      throughput: 15.615484314246006
+      inference_time: 65697.0
+      throughput: 15.221395193083398
       estimated_peak_memory_range:
-        min: 868352
-        max: 98172464
+        min: 57344
+        max: 97840808
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 57
         layers_on_cpu: 11
         total_layers: 80
-      job_id: jopr1ynkg
+      job_id: jygzwe9xg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:11:20Z'
+    timestamp: '2024-06-22T23:19:55Z'
   - torchscript_onnx_qnn:
-      inference_time: 1350.0
-      throughput: 740.7407407407408
+      inference_time: 1203.0
+      throughput: 831.255195344971
       estimated_peak_memory_range:
-        min: 1429504
-        max: 1429504
+        min: 458752
+        max: 458752
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: j2p0er605
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1264.0
-      throughput: 791.1392405063291
-      estimated_peak_memory_range:
-        min: 24887296
-        max: 24887296
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 83
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 83
-      job_id: jw56q1wng
+      job_id: jnp130l75
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:11:27Z'
+    timestamp: '2024-06-22T23:19:59Z'
diff --git a/qai_hub_models/models/riffusion_quantized/export.py b/qai_hub_models/models/riffusion_quantized/export.py
index 428d3b0f..9ad9d12c 100644
--- a/qai_hub_models/models/riffusion_quantized/export.py
+++ b/qai_hub_models/models/riffusion_quantized/export.py
@@ -27,7 +27,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py
index 962a8058..3602e58c 100644
--- a/qai_hub_models/models/sam/export.py
+++ b/qai_hub_models/models/sam/export.py
@@ -34,7 +34,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -128,7 +128,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -204,7 +203,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -250,8 +249,8 @@ def main():
         model_cls=Model,
         components=ALL_COMPONENTS,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml
index 370ec409..d2f22527 100644
--- a/qai_hub_models/models/sam/perf.yaml
+++ b/qai_hub_models/models/sam/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,19 +38,19 @@ models:
 - name: SAMDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 48230.0
-      throughput: 20.733982998133943
+      inference_time: 48549.0
+      throughput: 20.59774660652125
       estimated_peak_memory_range:
-        min: 4026368
-        max: 7727688
+        min: 4288512
+        max: 13324816
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 340
+        layers_on_npu: 341
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 340
-      job_id: j7gjklq15
+        total_layers: 341
+      job_id: jqpynewlg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,21 +59,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:12:47Z'
+    timestamp: '2024-06-22T23:21:23Z'
   - torchscript_onnx_tflite:
-      inference_time: 34548.0
-      throughput: 28.9452356142179
+      inference_time: 35232.0
+      throughput: 28.38328792007266
       estimated_peak_memory_range:
-        min: 12288
-        max: 245149360
+        min: 2129920
+        max: 257126480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 340
+        layers_on_npu: 341
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 340
-      job_id: jygzv7n4p
+        total_layers: 341
+      job_id: j1p88o9op
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,21 +82,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:12:49Z'
+    timestamp: '2024-06-22T23:21:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 48060.0
-      throughput: 20.807324178110694
+      inference_time: 47742.0
+      throughput: 20.945917640651835
       estimated_peak_memory_range:
-        min: 4009984
-        max: 12530416
+        min: 4038656
+        max: 13371224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 340
+        layers_on_npu: 341
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 340
-      job_id: jmg994dmg
+        total_layers: 341
+      job_id: jn5qw8ko5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,23 +105,46 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:12:50Z'
+    timestamp: '2024-06-22T23:21:28Z'
+  - torchscript_onnx_tflite:
+      inference_time: 47985.0
+      throughput: 20.83984578514119
+      estimated_peak_memory_range:
+        min: 4038656
+        max: 7051640
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 341
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 341
+      job_id: jw56v6lyp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:21:30Z'
 - name: SAMEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 12009970.0
-      throughput: 0.08326415469813829
+      inference_time: 11689749.0
+      throughput: 0.0855450360824685
       estimated_peak_memory_range:
-        min: 2723000320
-        max: 2727292856
+        min: 2661216256
+        max: 2670815904
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 37
-        layers_on_cpu: 771
-        total_layers: 808
-      job_id: jlpe4vy85
+        layers_on_cpu: 783
+        total_layers: 820
+      job_id: j2p0kyqn5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -128,21 +153,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:12:47Z'
+    timestamp: '2024-06-22T23:21:24Z'
   - torchscript_onnx_tflite:
-      inference_time: 9639117.0
-      throughput: 0.10374394252087614
+      inference_time: 10416533.0
+      throughput: 0.09600123188780758
       estimated_peak_memory_range:
-        min: 2582843392
-        max: 2946188672
+        min: 2565206016
+        max: 2927444288
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 37
-        layers_on_cpu: 771
-        total_layers: 808
-      job_id: jz5wm944g
+        layers_on_cpu: 783
+        total_layers: 820
+      job_id: jogkdznnp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -151,21 +176,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:12:49Z'
+    timestamp: '2024-06-22T23:21:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 11285658.0
-      throughput: 0.08860803685527242
+      inference_time: 11668449.0
+      throughput: 0.08570119302059769
       estimated_peak_memory_range:
-        min: 2642145280
-        max: 2645812336
+        min: 2719719424
+        max: 2724482808
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
         layers_on_npu: 0
         layers_on_gpu: 37
-        layers_on_cpu: 771
-        total_layers: 808
-      job_id: jnp1q86ng
+        layers_on_cpu: 783
+        total_layers: 820
+      job_id: j1gl7nrm5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -174,4 +199,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:12:51Z'
+    timestamp: '2024-06-22T23:21:28Z'
diff --git a/qai_hub_models/models/sesr_m5/export.py b/qai_hub_models/models/sesr_m5/export.py
index de36e9d6..873520ad 100644
--- a/qai_hub_models/models/sesr_m5/export.py
+++ b/qai_hub_models/models/sesr_m5/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/sesr_m5/info.yaml b/qai_hub_models/models/sesr_m5/info.yaml
index f6715c77..cfe28c66 100644
--- a/qai_hub_models/models/sesr_m5/info.yaml
+++ b/qai_hub_models/models/sesr_m5/info.yaml
@@ -15,7 +15,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr
 technical_details:
   Model checkpoint: sesr_m5_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 338K
   Model size: 1.30 MB
 applicable_scenarios:
diff --git a/qai_hub_models/models/sesr_m5/model.py b/qai_hub_models/models/sesr_m5/model.py
index b8aa863e..43627434 100644
--- a/qai_hub_models/models/sesr_m5/model.py
+++ b/qai_hub_models/models/sesr_m5/model.py
@@ -41,6 +41,5 @@ def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> SESR_M5:
         )
         checkpoint = load_torch(checkpoint_asset)
         model.load_state_dict(checkpoint["state_dict"])
-        model.eval()
 
         return cls(model, scale_factor)
diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml
index 04e56cbe..196c4b2d 100644
--- a/qai_hub_models/models/sesr_m5/perf.yaml
+++ b/qai_hub_models/models/sesr_m5/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: SESR-M5
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2201.0
-      throughput: 454.3389368468878
+      inference_time: 2231.0
+      throughput: 448.22949350067233
       estimated_peak_memory_range:
-        min: 16384
-        max: 2206696
+        min: 24576
+        max: 1518464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: j1gle1ymp
+      job_id: jvgd0wlzp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2133.0
-      throughput: 468.8232536333802
+      inference_time: 2138.0
+      throughput: 467.7268475210477
       estimated_peak_memory_range:
-        min: 2113536
-        max: 6868544
+        min: 12288
+        max: 5006592
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jwgoe4lkp
+      job_id: jo5m4ry95
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2875.0
-      throughput: 347.82608695652175
+    torchscript_onnx:
+      inference_time: 2890.0
+      throughput: 346.02076124567475
       estimated_peak_memory_range:
-        min: 12288
-        max: 6151368
+        min: 16384
+        max: 6069072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jygzv4lxp
+      job_id: j2p0ky7n5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:58:43Z'
+    timestamp: '2024-06-22T23:22:11Z'
   - torchscript_onnx_tflite:
-      inference_time: 1621.0
-      throughput: 616.9031462060457
+      inference_time: 1678.0
+      throughput: 595.9475566150179
       estimated_peak_memory_range:
         min: 16384
-        max: 25573456
+        max: 26285840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jw56qd8yg
+      job_id: jz576z39g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1460.0
-      throughput: 684.931506849315
+      inference_time: 1461.0
+      throughput: 684.4626967830253
       estimated_peak_memory_range:
         min: 204800
-        max: 26892880
+        max: 21246608
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j1pvz9lrg
+      job_id: jegnx28q5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1954.0
-      throughput: 511.77072671443193
+    torchscript_onnx:
+      inference_time: 1939.0
+      throughput: 515.7297576070139
       estimated_peak_memory_range:
         min: 212992
-        max: 20764320
+        max: 21682432
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jz5wm1lmg
+      job_id: j1p88ovop
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:58:43Z'
+    timestamp: '2024-06-22T23:22:12Z'
   - torchscript_onnx_tflite:
-      inference_time: 2290.0
-      throughput: 436.68122270742356
+      inference_time: 2241.0
+      throughput: 446.2293618920125
       estimated_peak_memory_range:
         min: 28672
-        max: 8571536
+        max: 1296088
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: j1p3qwzn5
+      job_id: jqp48q01g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2135.0
-      throughput: 468.384074941452
+      inference_time: 2148.0
+      throughput: 465.54934823091247
       estimated_peak_memory_range:
-        min: 16384
-        max: 9688296
+        min: 57344
+        max: 4305864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jlpe4l7v5
+      job_id: jep2j8nq5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:58:42Z'
+    timestamp: '2024-06-22T23:22:08Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2247.0
+      throughput: 445.0378282153983
+      estimated_peak_memory_range:
+        min: 24576
+        max: 1504000
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 22
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 25
+      job_id: j0pxmv2lg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2129.0
+      throughput: 469.7040864255519
+      estimated_peak_memory_range:
+        min: 16384
+        max: 79333368
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 31
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 31
+      job_id: jqpyne0lg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:22:10Z'
   - torchscript_onnx_qnn:
-      inference_time: 2971.0
-      throughput: 336.58700774150117
+      inference_time: 2424.0
+      throughput: 412.54125412541254
       estimated_peak_memory_range:
-        min: 221184
-        max: 221184
+        min: 204800
+        max: 204800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: j7gjkwre5
+      job_id: jopr9kj7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2911.0
-      throughput: 343.52456200618343
+    torchscript_onnx:
+      inference_time: 2941.0
+      throughput: 340.02040122407345
       estimated_peak_memory_range:
-        min: 12976128
-        max: 12976128
+        min: 12984320
+        max: 12984320
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 33
-      job_id: jmg99xz8g
+      job_id: jogkdzmnp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:58:44Z'
+    timestamp: '2024-06-22T23:22:13Z'
diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py
index 770f96a9..bcdedef7 100644
--- a/qai_hub_models/models/sesr_m5_quantized/export.py
+++ b/qai_hub_models/models/sesr_m5_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +216,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/sesr_m5_quantized/info.yaml b/qai_hub_models/models/sesr_m5_quantized/info.yaml
index 1852e48b..608c2f05 100644
--- a/qai_hub_models/models/sesr_m5_quantized/info.yaml
+++ b/qai_hub_models/models/sesr_m5_quantized/info.yaml
@@ -15,7 +15,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr
 technical_details:
   Model checkpoint: sesr_m5_4x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 338K
   Model size: 389 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py
index 2d6a806d..c6638c3d 100644
--- a/qai_hub_models/models/sesr_m5_quantized/model.py
+++ b/qai_hub_models/models/sesr_m5_quantized/model.py
@@ -73,6 +73,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
-
         return cls(sim, scale_factor)
diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml
index 617858a7..c02b6215 100644
--- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml
+++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml
@@ -8,11 +8,11 @@ aggregated:
   - Google Pixel 4
   - Google Pixel 4a
   - Google Pixel 5a 5G
-  - QCS6490 (Proxy)
   - QCS8250 (Proxy)
   - QCS8550 (Proxy)
-  - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -30,9 +30,10 @@ aggregated:
   - Xiaomi 12
   - Xiaomi 12 Pro
   supported_chipsets:
-  - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +43,11 @@ models:
 - name: SESR-M5-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1220.0
-      throughput: 819.672131147541
+      inference_time: 1222.0
+      throughput: 818.3306055646481
       estimated_peak_memory_range:
-        min: 24576
-        max: 1557800
+        min: 36864
+        max: 1336288
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +55,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jvgd7zdzg
+      job_id: j1gl7zmm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1050.0
-      throughput: 952.3809523809524
+      inference_time: 1070.0
+      throughput: 934.5794392523364
       estimated_peak_memory_range:
-        min: 65536
-        max: 4040712
+        min: 12288
+        max: 9384952
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +70,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 26
-      job_id: jegnr7zq5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1055.0
-      throughput: 947.8672985781991
-      estimated_peak_memory_range:
-        min: 12288
-        max: 4410832
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 29
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 29
-      job_id: j2p0evmn5
+      job_id: jlpe21mvp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +79,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:59:10Z'
+    timestamp: '2024-06-22T23:22:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 1043.0
-      throughput: 958.7727708533077
+      inference_time: 1015.0
+      throughput: 985.2216748768473
       estimated_peak_memory_range:
-        min: 16384
-        max: 23270336
+        min: 12288
+        max: 23781808
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +93,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jz57v7e95
+      job_id: jw56vj4yp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 754.0
-      throughput: 1326.2599469496022
+      inference_time: 755.0
+      throughput: 1324.5033112582782
       estimated_peak_memory_range:
-        min: 0
-        max: 21775952
+        min: 61440
+        max: 22369056
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +108,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 26
-      job_id: jopr1nl7g
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 808.0
-      throughput: 1237.6237623762377
-      estimated_peak_memory_range:
-        min: 24576
-        max: 16135216
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 29
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 29
-      job_id: j1p8w4eop
+      job_id: jygzw9dxg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +117,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:59:11Z'
+    timestamp: '2024-06-22T23:22:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 1214.0
-      throughput: 823.7232289950576
+      inference_time: 1241.0
+      throughput: 805.8017727639001
       estimated_peak_memory_range:
         min: 12288
-        max: 1428272
+        max: 2099752
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +131,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jqp4j9y1p
+      job_id: j1p3830n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1049.0
-      throughput: 953.2888465204957
+      inference_time: 1041.0
+      throughput: 960.6147934678194
       estimated_peak_memory_range:
         min: 12288
-        max: 80506384
+        max: 15321680
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +146,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 26
-      job_id: jqpyv7olp
+      job_id: jmg981n8p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +155,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:59:09Z'
+    timestamp: '2024-06-22T23:22:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 3090.0
-      throughput: 323.62459546925567
+      inference_time: 1227.0
+      throughput: 814.9959250203749
       estimated_peak_memory_range:
         min: 12288
-        max: 16873840
+        max: 1397720
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 22
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 25
+      job_id: jwgom06k5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1041.0
+      throughput: 960.6147934678194
+      estimated_peak_memory_range:
+        min: 16384
+        max: 55896296
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 26
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 26
+      job_id: jnp13lz75
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:22:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3105.0
+      throughput: 322.061191626409
+      estimated_peak_memory_range:
+        min: 49152
+        max: 17239376
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +207,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: j0pxedll5
+      job_id: j1pv4okrp
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +216,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-11T11:59:04Z'
+    timestamp: '2024-06-22T23:22:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 16778.0
-      throughput: 59.60185957801883
+      inference_time: 15911.0
+      throughput: 62.84960090503425
       estimated_peak_memory_range:
-        min: 249856
-        max: 7422256
+        min: 3657728
+        max: 6562240
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +230,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 25
-      job_id: jo5mvd095
+      job_id: j7gj1mneg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,10 +239,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-11T11:59:05Z'
+    timestamp: '2024-06-22T23:22:38Z'
   - torchscript_onnx_qnn:
-      inference_time: 1198.0
-      throughput: 834.7245409015025
+      inference_time: 1053.0
+      throughput: 949.667616334283
       estimated_peak_memory_range:
         min: 57344
         max: 57344
@@ -244,22 +253,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 26
-      job_id: jep23vrqg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1092.0
-      throughput: 915.7509157509157
-      estimated_peak_memory_range:
-        min: 5398528
-        max: 5398528
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 29
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 29
-      job_id: jogkr92n5
+      job_id: jz5wxv6mp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +262,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:59:12Z'
+    timestamp: '2024-06-22T23:22:42Z'
diff --git a/qai_hub_models/models/shufflenet_v2/export.py b/qai_hub_models/models/shufflenet_v2/export.py
index a3bfc586..7a11fba9 100644
--- a/qai_hub_models/models/shufflenet_v2/export.py
+++ b/qai_hub_models/models/shufflenet_v2/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml
index 0d002af4..b3ddccf5 100644
--- a/qai_hub_models/models/shufflenet_v2/perf.yaml
+++ b/qai_hub_models/models/shufflenet_v2/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Shufflenet-v2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1229.0
-      throughput: 813.6696501220505
+      inference_time: 1230.0
+      throughput: 813.0081300813008
       estimated_peak_memory_range:
-        min: 49152
-        max: 1892400
+        min: 24576
+        max: 1946184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 204
-      job_id: jqpyvdx0p
+      job_id: jqp48or2g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 767.0
-      throughput: 1303.7809647979138
+      inference_time: 779.0
+      throughput: 1283.6970474967907
       estimated_peak_memory_range:
-        min: 12288
-        max: 127973560
+        min: 622592
+        max: 4960848
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: jogkry2v5
+      job_id: jopr9q3kp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1093.0
-      throughput: 914.9130832570905
+    torchscript_onnx:
+      inference_time: 1070.0
+      throughput: 934.5794392523364
       estimated_peak_memory_range:
-        min: 0
-        max: 4739736
+        min: 12288
+        max: 11408736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: j1p3qmzm5
+      job_id: jogkdnevp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:14:43Z'
+    timestamp: '2024-06-22T23:23:19Z'
   - torchscript_onnx_tflite:
-      inference_time: 816.0
-      throughput: 1225.4901960784314
+      inference_time: 815.0
+      throughput: 1226.993865030675
       estimated_peak_memory_range:
-        min: 12288
-        max: 34358736
+        min: 16384
+        max: 37553248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 204
-      job_id: j2p0erm05
+      job_id: j0pxmjo8g
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 519.0
       throughput: 1926.7822736030828
       estimated_peak_memory_range:
-        min: 12288
-        max: 59916624
+        min: 618496
+        max: 53660880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: jn5q92lep
+      job_id: jep2j6y65
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 762.0
-      throughput: 1312.3359580052493
+    torchscript_onnx:
+      inference_time: 741.0
+      throughput: 1349.527665317139
       estimated_peak_memory_range:
-        min: 12288
-        max: 24863536
+        min: 618496
+        max: 19576080
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: jwgoevl1p
+      job_id: jn5qwk6e5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:14:44Z'
+    timestamp: '2024-06-22T23:23:20Z'
   - torchscript_onnx_tflite:
-      inference_time: 1223.0
-      throughput: 817.6614881439084
+      inference_time: 1221.0
+      throughput: 819.000819000819
       estimated_peak_memory_range:
-        min: 28672
-        max: 1440712
+        min: 12288
+        max: 1486952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 204
-      job_id: j1p8w7eqp
+      job_id: jo5m42x75
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 769.0
-      throughput: 1300.3901170351105
+      inference_time: 780.0
+      throughput: 1282.051282051282
       estimated_peak_memory_range:
-        min: 622592
-        max: 138856072
+        min: 77824
+        max: 4781416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: jw56q18ng
+      job_id: j2p0kqz05
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:14:42Z'
+    timestamp: '2024-06-22T23:23:17Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1231.0
+      throughput: 812.3476848090983
+      estimated_peak_memory_range:
+        min: 12288
+        max: 1741560
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 204
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 204
+      job_id: jegnxyvj5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 785.0
+      throughput: 1273.8853503184714
+      estimated_peak_memory_range:
+        min: 36864
+        max: 17255528
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 158
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 158
+      job_id: j1p889qqp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:23:18Z'
   - torchscript_onnx_qnn:
-      inference_time: 1095.0
-      throughput: 913.2420091324201
+      inference_time: 890.0
+      throughput: 1123.5955056179776
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 158
-      job_id: j1gleky2p
+      job_id: jqpynw30g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1103.0
-      throughput: 906.6183136899365
+    torchscript_onnx:
+      inference_time: 1113.0
+      throughput: 898.4725965858041
       estimated_peak_memory_range:
-        min: 5971968
-        max: 5971968
+        min: 7417856
+        max: 7417856
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 223
-      job_id: j1pvzwlzg
+      job_id: j1gl7zv25
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:14:45Z'
+    timestamp: '2024-06-22T23:23:21Z'
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/export.py b/qai_hub_models/models/shufflenet_v2_quantized/export.py
index 437ca9e5..a66bc769 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/export.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -227,7 +227,7 @@ def export_model(
 
 def main():
     warnings.filterwarnings("ignore")
-    parser = export_parser(model_cls=Model, supports_ort=False)
+    parser = export_parser(model_cls=Model, supports_onnx=False)
     args = parser.parse_args()
     export_model(**vars(args))
 
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/model.py b/qai_hub_models/models/shufflenet_v2_quantized/model.py
index a3d7540d..e77c1af3 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/model.py
+++ b/qai_hub_models/models/shufflenet_v2_quantized/model.py
@@ -92,5 +92,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
index 342366ce..ec0a0951 100644
--- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
+++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: Shufflenet-v2Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 629.0
-      throughput: 1589.825119236884
+      inference_time: 642.0
+      throughput: 1557.632398753894
       estimated_peak_memory_range:
-        min: 16384
-        max: 1932240
+        min: 12288
+        max: 2212664
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: jlpe4v785
+      job_id: j1p383jm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 582.0
-      throughput: 1718.213058419244
+      inference_time: 583.0
+      throughput: 1715.2658662092624
       estimated_peak_memory_range:
         min: 16384
-        max: 102592048
+        max: 64010128
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,7 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: jvgd7vd6g
+      job_id: jz5wxve4p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -78,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:15:22Z'
+    timestamp: '2024-06-22T23:23:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 459.0
-      throughput: 2178.649237472767
+      inference_time: 460.0
+      throughput: 2173.913043478261
       estimated_peak_memory_range:
-        min: 12288
-        max: 23307232
+        min: 16384
+        max: 26620064
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -92,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: jygzv7l4p
+      job_id: jwgom0215
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 418.0
-      throughput: 2392.3444976076553
+      inference_time: 420.0
+      throughput: 2380.9523809523807
       estimated_peak_memory_range:
         min: 163840
-        max: 50012432
+        max: 45420992
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,7 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: jz5wm9lzg
+      job_id: jmg981lmp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -116,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:15:23Z'
+    timestamp: '2024-06-22T23:24:01Z'
   - torchscript_onnx_tflite:
-      inference_time: 649.0
-      throughput: 1540.8320493066255
+      inference_time: 622.0
+      throughput: 1607.717041800643
       estimated_peak_memory_range:
         min: 12288
-        max: 1631760
+        max: 1609232
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -130,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: jz5wm9l4g
+      job_id: j1pv4o6zp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 585.0
-      throughput: 1709.4017094017095
+      inference_time: 587.0
+      throughput: 1703.5775127768313
       estimated_peak_memory_range:
-        min: 163840
-        max: 77147648
+        min: 16384
+        max: 88702944
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -145,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: jnp1q8nkg
+      job_id: jvgd09x6p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -154,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:15:26Z'
+    timestamp: '2024-06-22T23:24:03Z'
+  - torchscript_onnx_tflite:
+      inference_time: 625.0
+      throughput: 1600.0
+      estimated_peak_memory_range:
+        min: 16384
+        max: 1789400
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 205
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 205
+      job_id: j7gj1mv1g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 598.0
+      throughput: 1672.2408026755852
+      estimated_peak_memory_range:
+        min: 172032
+        max: 98562128
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 122
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 122
+      job_id: jz576wyng
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:24:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 944.0
-      throughput: 1059.322033898305
+      inference_time: 976.0
+      throughput: 1024.5901639344263
       estimated_peak_memory_range:
         min: 12288
-        max: 17202032
+        max: 19164000
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -168,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 205
-      job_id: jmg994zmg
+      job_id: jlpe21d8p
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -177,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:15:20Z'
+    timestamp: '2024-06-22T23:23:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 8510.0
-      throughput: 117.50881316098707
+      inference_time: 9453.0
+      throughput: 105.78652279699567
       estimated_peak_memory_range:
-        min: 65536
-        max: 5500048
+        min: 176128
+        max: 9298744
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -191,7 +233,7 @@ models:
         layers_on_gpu: 9
         layers_on_cpu: 153
         total_layers: 205
-      job_id: jnp1q8nng
+      job_id: jygzw934g
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -200,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:15:21Z'
+    timestamp: '2024-06-22T23:23:58Z'
   - torchscript_onnx_qnn:
-      inference_time: 694.0
-      throughput: 1440.922190201729
+      inference_time: 658.0
+      throughput: 1519.756838905775
       estimated_peak_memory_range:
-        min: 618496
-        max: 618496
+        min: 540672
+        max: 540672
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -214,7 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 122
-      job_id: jmg994zqg
+      job_id: jnp13l4n5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -223,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:15:24Z'
+    timestamp: '2024-06-22T23:24:02Z'
diff --git a/qai_hub_models/models/sinet/app.py b/qai_hub_models/models/sinet/app.py
index 00a29a2f..3d3ba287 100644
--- a/qai_hub_models/models/sinet/app.py
+++ b/qai_hub_models/models/sinet/app.py
@@ -78,8 +78,7 @@ def predict(
         """
 
         input_tensor = preprocess_image(image)
-        with torch.no_grad():
-            output = self.model(input_tensor)
+        output = self.model(input_tensor)
 
         face_map = (output[0].data.cpu() > 0).numpy()[0]
         bg_map = output[0].max(0)[1].byte().data.cpu().numpy()
diff --git a/qai_hub_models/models/sinet/export.py b/qai_hub_models/models/sinet/export.py
index 495dde9f..2dfd9f5c 100644
--- a/qai_hub_models/models/sinet/export.py
+++ b/qai_hub_models/models/sinet/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/sinet/model.py b/qai_hub_models/models/sinet/model.py
index 76625bd3..1a593d16 100644
--- a/qai_hub_models/models/sinet/model.py
+++ b/qai_hub_models/models/sinet/model.py
@@ -40,7 +40,7 @@ def __init__(
     def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> SINet:
         sinet_model = _load_sinet_source_model_from_weights(weights)
 
-        return cls(sinet_model.eval())
+        return cls(sinet_model)
 
     def forward(self, image: torch.Tensor) -> torch.Tensor:
         """
diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml
index 79d10a22..504e9f64 100644
--- a/qai_hub_models/models/sinet/perf.yaml
+++ b/qai_hub_models/models/sinet/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: SINet
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1808.0
-      throughput: 553.0973451327434
+      inference_time: 1758.0
+      throughput: 568.8282138794084
       estimated_peak_memory_range:
-        min: 16384
-        max: 1874832
+        min: 20480
+        max: 2455968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jo5mvz0y5
+      job_id: jqpynw10g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1170.0
-      throughput: 854.7008547008547
+      inference_time: 1168.0
+      throughput: 856.1643835616438
       estimated_peak_memory_range:
-        min: 16384
-        max: 4437520
+        min: 622592
+        max: 28052072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jep23moxg
+      job_id: jn5qwk0e5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2242.0
-      throughput: 446.03033006244425
+    torchscript_onnx:
+      inference_time: 2243.0
+      throughput: 445.83147570218455
       estimated_peak_memory_range:
-        min: 233472
-        max: 61135024
+        min: 16384
+        max: 5425456
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: jogkry6y5
+      job_id: j1pv4oqzp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:15:55Z'
+    timestamp: '2024-06-22T23:24:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 1188.0
-      throughput: 841.7508417508418
+      inference_time: 1160.0
+      throughput: 862.0689655172414
       estimated_peak_memory_range:
-        min: 12288
-        max: 27213536
+        min: 16384
+        max: 29922544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jegnre1v5
+      job_id: j2p0kq405
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 784.0
       throughput: 1275.5102040816328
       estimated_peak_memory_range:
-        min: 0
-        max: 67399104
+        min: 12288
+        max: 61060032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: jqpyvd8rp
+      job_id: j1gl7z425
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1564.0
-      throughput: 639.386189258312
+    torchscript_onnx:
+      inference_time: 1545.0
+      throughput: 647.2491909385113
       estimated_peak_memory_range:
-        min: 12288
-        max: 25637744
+        min: 0
+        max: 19680448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: jn5q9247p
+      job_id: j7gj1md1g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:15:55Z'
+    timestamp: '2024-06-22T23:24:41Z'
   - torchscript_onnx_tflite:
-      inference_time: 1809.0
-      throughput: 552.791597567717
+      inference_time: 1748.0
+      throughput: 572.0823798627002
       estimated_peak_memory_range:
         min: 12288
-        max: 1931632
+        max: 2065992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 240
-      job_id: jopr1yxvg
+      job_id: j1p8892qp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1183.0
-      throughput: 845.30853761623
+      inference_time: 1185.0
+      throughput: 843.8818565400844
       estimated_peak_memory_range:
-        min: 622592
-        max: 10230296
+        min: 618496
+        max: 60975648
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: j1p8w7jzp
+      job_id: j1p383nm5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:15:54Z'
+    timestamp: '2024-06-22T23:24:37Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1749.0
+      throughput: 571.7552887364208
+      estimated_peak_memory_range:
+        min: 20480
+        max: 2530872
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 240
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 240
+      job_id: jogkdnvvp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1171.0
+      throughput: 853.9709649871904
+      estimated_peak_memory_range:
+        min: 16384
+        max: 13834928
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 186
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 186
+      job_id: jwgom0z15
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:24:38Z'
   - torchscript_onnx_qnn:
-      inference_time: 1353.0
-      throughput: 739.0983000739099
+      inference_time: 1319.0
+      throughput: 758.1501137225171
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 186
-      job_id: j2p0ero25
+      job_id: jw56vj2np
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2343.0
-      throughput: 426.8032437046522
+    torchscript_onnx:
+      inference_time: 2306.0
+      throughput: 433.6513443191674
       estimated_peak_memory_range:
-        min: 6090752
-        max: 6090752
+        min: 5414912
+        max: 5414912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 229
-      job_id: j1glekwep
+      job_id: jlpe21o8p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:15:56Z'
+    timestamp: '2024-06-22T23:24:42Z'
diff --git a/qai_hub_models/models/squeezenet1_1/export.py b/qai_hub_models/models/squeezenet1_1/export.py
index 361dd2cc..247c50b2 100644
--- a/qai_hub_models/models/squeezenet1_1/export.py
+++ b/qai_hub_models/models/squeezenet1_1/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml
index e1645dcb..8a5f5078 100644
--- a/qai_hub_models/models/squeezenet1_1/perf.yaml
+++ b/qai_hub_models/models/squeezenet1_1/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: SqueezeNet-1_1
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 646.0
-      throughput: 1547.9876160990711
+      inference_time: 661.0
+      throughput: 1512.8593040847202
       estimated_peak_memory_range:
-        min: 20480
-        max: 1582896
+        min: 24576
+        max: 1440368
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: j1p3qmox5
+      job_id: jz5wxvw4p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 702.0
-      throughput: 1424.5014245014245
+      inference_time: 719.0
+      throughput: 1390.8205841446454
       estimated_peak_memory_range:
-        min: 16384
-        max: 7170920
+        min: 622592
+        max: 4290184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: j7gjkl375
+      job_id: jz576w2ng
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 671.0
-      throughput: 1490.312965722802
+    torchscript_onnx:
+      inference_time: 680.0
+      throughput: 1470.5882352941176
       estimated_peak_memory_range:
-        min: 12288
-        max: 11919448
+        min: 16384
+        max: 7145704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jmg994oqg
+      job_id: jopr9q6kp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:16:21Z'
+    timestamp: '2024-06-22T23:25:10Z'
   - torchscript_onnx_tflite:
-      inference_time: 452.0
-      throughput: 2212.3893805309735
+      inference_time: 476.0
+      throughput: 2100.840336134454
       estimated_peak_memory_range:
-        min: 18399232
-        max: 41710416
+        min: 12288
+        max: 23926224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jwgoevd4p
+      job_id: jmg9810mp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 492.0
-      throughput: 2032.520325203252
+      inference_time: 491.0
+      throughput: 2036.6598778004072
       estimated_peak_memory_range:
         min: 0
-        max: 32687824
+        max: 27339600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jlpe4v675
+      job_id: jqp48on2g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 477.0
-      throughput: 2096.4360587002097
+    torchscript_onnx:
+      inference_time: 510.0
+      throughput: 1960.7843137254902
       estimated_peak_memory_range:
-        min: 12288
-        max: 19637184
+        min: 24576
+        max: 16423504
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jnp1q8okg
+      job_id: jep2j6x65
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:16:22Z'
+    timestamp: '2024-06-22T23:25:11Z'
   - torchscript_onnx_tflite:
-      inference_time: 664.0
-      throughput: 1506.0240963855422
+      inference_time: 653.0
+      throughput: 1531.3935681470139
       estimated_peak_memory_range:
-        min: 20480
-        max: 1398696
+        min: 12288
+        max: 1448408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: j1pvzw27g
+      job_id: jnp13l2n5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 698.0
-      throughput: 1432.6647564469913
+      inference_time: 717.0
+      throughput: 1394.700139470014
       estimated_peak_memory_range:
-        min: 634880
-        max: 7259784
+        min: 622592
+        max: 90389384
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jz5wm9yzg
+      job_id: jo5m42e75
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:16:20Z'
+    timestamp: '2024-06-22T23:25:07Z'
+  - torchscript_onnx_tflite:
+      inference_time: 670.0
+      throughput: 1492.5373134328358
+      estimated_peak_memory_range:
+        min: 12288
+        max: 3072888
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 41
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 41
+      job_id: jvgd09n6p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 707.0
+      throughput: 1414.4271570014143
+      estimated_peak_memory_range:
+        min: 20480
+        max: 5774040
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 70
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 70
+      job_id: jegnxy0j5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:25:09Z'
   - torchscript_onnx_qnn:
-      inference_time: 801.0
-      throughput: 1248.4394506866417
+      inference_time: 817.0
+      throughput: 1223.9902080783354
       estimated_peak_memory_range:
-        min: 606208
-        max: 606208
+        min: 602112
+        max: 602112
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 70
-      job_id: jygzv7zzp
+      job_id: j0pxmj98g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 681.0
-      throughput: 1468.4287812041116
+    torchscript_onnx:
+      inference_time: 662.0
+      throughput: 1510.5740181268882
       estimated_peak_memory_range:
-        min: 3670016
-        max: 3670016
+        min: 3444736
+        max: 3444736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 71
-      job_id: jvgd7v6kg
+      job_id: jqpynwz0g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:16:23Z'
+    timestamp: '2024-06-22T23:25:12Z'
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/export.py b/qai_hub_models/models/squeezenet1_1_quantized/export.py
index f68f076e..ab2bba60 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/export.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/model.py b/qai_hub_models/models/squeezenet1_1_quantized/model.py
index 63e2a276..097b1cd9 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/model.py
+++ b/qai_hub_models/models/squeezenet1_1_quantized/model.py
@@ -78,5 +78,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
index 1d3b45b2..8ef50f58 100644
--- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
+++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: SqueezeNet-1_1Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 220.0
-      throughput: 4545.454545454545
+      inference_time: 229.0
+      throughput: 4366.812227074236
       estimated_peak_memory_range:
-        min: 16384
-        max: 1715824
+        min: 24576
+        max: 1612824
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jqp4jweqp
+      job_id: j1p8890qp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 467.0
-      throughput: 2141.3276231263385
+      inference_time: 468.0
+      throughput: 2136.7521367521367
       estimated_peak_memory_range:
         min: 167936
-        max: 10118072
+        max: 3876080
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: jep23mdxg
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 450.0
-      throughput: 2222.222222222222
-      estimated_peak_memory_range:
-        min: 12288
-        max: 5507096
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 47
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 47
-      job_id: jogkry0y5
+      job_id: jwgom0k15
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:16:52Z'
+    timestamp: '2024-06-22T23:25:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 179.0
-      throughput: 5586.592178770949
+      inference_time: 184.0
+      throughput: 5434.782608695652
       estimated_peak_memory_range:
         min: 12288
-        max: 22450960
+        max: 23959632
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: j0pxe10j5
+      job_id: jogkdn7vp
       job_status: Passed
     torchscript_onnx_qnn:
       inference_time: 342.0
       throughput: 2923.9766081871344
       estimated_peak_memory_range:
-        min: 12288
-        max: 27530432
+        min: 163840
+        max: 28116064
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: jqpyvd2rp
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 372.0
-      throughput: 2688.1720430107525
-      estimated_peak_memory_range:
-        min: 12288
-        max: 15334176
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 47
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 47
-      job_id: jn5q9217p
+      job_id: j1pv4o0zp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:16:53Z'
+    timestamp: '2024-06-22T23:25:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 223.0
-      throughput: 4484.304932735426
+      inference_time: 218.0
+      throughput: 4587.155963302752
       estimated_peak_memory_range:
         min: 20480
-        max: 1471296
+        max: 1350800
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jo5mvz9y5
+      job_id: jn5qwkee5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 464.0
-      throughput: 2155.1724137931033
+      inference_time: 467.0
+      throughput: 2141.3276231263385
       estimated_peak_memory_range:
-        min: 28672
-        max: 17992504
+        min: 172032
+        max: 10450952
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: j1p8w7rzp
+      job_id: jlpe21e8p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:16:51Z'
+    timestamp: '2024-06-22T23:25:43Z'
   - torchscript_onnx_tflite:
-      inference_time: 526.0
-      throughput: 1901.1406844106464
+      inference_time: 219.0
+      throughput: 4566.2100456621
       estimated_peak_memory_range:
         min: 12288
-        max: 14752288
+        max: 1516424
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jegnreqv5
+      job_id: j1gl7z625
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 465.0
+      throughput: 2150.537634408602
+      estimated_peak_memory_range:
+        min: 12288
+        max: 17011160
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 45
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 45
+      job_id: jygzw9o4g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:25:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 533.0
+      throughput: 1876.172607879925
+      estimated_peak_memory_range:
+        min: 12288
+        max: 16053664
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 41
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 41
+      job_id: jw56vjenp
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:16:46Z'
+    timestamp: '2024-06-22T23:25:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 4092.0
-      throughput: 244.37927663734115
+      inference_time: 4108.0
+      throughput: 243.42745861733204
       estimated_peak_memory_range:
-        min: 20480
-        max: 7234128
+        min: 163840
+        max: 1845464
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 41
-      job_id: jopr1ydvg
+      job_id: j1p383vm5
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:16:47Z'
+    timestamp: '2024-06-22T23:25:38Z'
   - torchscript_onnx_qnn:
-      inference_time: 536.0
-      throughput: 1865.6716417910447
+      inference_time: 541.0
+      throughput: 1848.4288354898335
       estimated_peak_memory_range:
-        min: 1843200
-        max: 1843200
+        min: 679936
+        max: 679936
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 45
-      job_id: j2p0er925
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 472.0
-      throughput: 2118.64406779661
-      estimated_peak_memory_range:
-        min: 2641920
-        max: 2641920
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 47
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 47
-      job_id: j1glek8ep
+      job_id: j7gj1mz1g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:16:54Z'
+    timestamp: '2024-06-22T23:25:42Z'
diff --git a/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py b/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py
index c6394ccb..6b5d0103 100644
--- a/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py
+++ b/qai_hub_models/models/stable_diffusion_v1_5_quantized/export.py
@@ -27,7 +27,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
diff --git a/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py b/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py
index d2b0dffd..9fdaa7eb 100644
--- a/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py
+++ b/qai_hub_models/models/stable_diffusion_v2_1_quantized/export.py
@@ -27,7 +27,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
diff --git a/qai_hub_models/models/stylegan2/app.py b/qai_hub_models/models/stylegan2/app.py
index f0c4c304..2db6ace2 100644
--- a/qai_hub_models/models/stylegan2/app.py
+++ b/qai_hub_models/models/stylegan2/app.py
@@ -76,34 +76,31 @@ def generate_images(
         Returns:
             See raw_output parameter description.
         """
-        with torch.no_grad():
-            if image_noise is None:
-                image_noise = self.generate_random_vec(
-                    batch_size=class_idx.shape[0] if class_idx is not None else 1
-                )
-
-            if self.num_classes != 0:
-                if isinstance(class_idx, int):
-                    class_idx = torch.Tensor([class_idx] * image_noise.shape[0])
-
-                if isinstance(class_idx, torch.Tensor) and len(class_idx.shape) == 1:
-                    # Convert from [N] class index to one-hot [N, # of classes]
-                    assert class_idx.dtype == torch.int
-                    model_classes = torch.nn.functional.one_hot(
-                        class_idx, self.num_classes
-                    )
-                else:
-                    model_classes = class_idx
-
-                image_tensor = self.model(image_noise, model_classes)
+        if image_noise is None:
+            image_noise = self.generate_random_vec(
+                batch_size=class_idx.shape[0] if class_idx is not None else 1
+            )
+
+        if self.num_classes != 0:
+            if isinstance(class_idx, int):
+                class_idx = torch.Tensor([class_idx] * image_noise.shape[0])
+
+            if isinstance(class_idx, torch.Tensor) and len(class_idx.shape) == 1:
+                # Convert from [N] class index to one-hot [N, # of classes]
+                assert class_idx.dtype == torch.int
+                model_classes = torch.nn.functional.one_hot(class_idx, self.num_classes)
             else:
-                image_tensor = self.model(image_noise)
+                model_classes = class_idx
 
-            image_tensor = (
-                (image_tensor.permute(0, 2, 3, 1) * 127.5 + 128)
-                .clamp(0, 255)
-                .to(torch.uint8)
-            )
+            image_tensor = self.model(image_noise, model_classes)
+        else:
+            image_tensor = self.model(image_noise)
+
+        image_tensor = (
+            (image_tensor.permute(0, 2, 3, 1) * 127.5 + 128)
+            .clamp(0, 255)
+            .to(torch.uint8)
+        )
 
         if raw_output:
             return image_tensor
diff --git a/qai_hub_models/models/stylegan2/export.py b/qai_hub_models/models/stylegan2/export.py
index 5a712592..354e2082 100644
--- a/qai_hub_models/models/stylegan2/export.py
+++ b/qai_hub_models/models/stylegan2/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -115,13 +115,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -179,7 +178,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -201,7 +200,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
@@ -216,8 +215,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/stylegan2/perf.yaml b/qai_hub_models/models/stylegan2/perf.yaml
index 938965f9..f9924141 100644
--- a/qai_hub_models/models/stylegan2/perf.yaml
+++ b/qai_hub_models/models/stylegan2/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: StyleGAN2
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1649413.0
-      throughput: 0.6062762934450013
+      inference_time: 1555617.0
+      throughput: 0.6428317510029783
       estimated_peak_memory_range:
-        min: 1397805056
-        max: 2230233016
+        min: 1394122752
+        max: 2215496248
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -48,7 +50,7 @@ models:
         layers_on_gpu: 78
         layers_on_cpu: 402
         total_layers: 480
-      job_id: j1p3qm7x5
+      job_id: j0pxmjn8g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -57,13 +59,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:17:52Z'
+    timestamp: '2024-06-22T23:26:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 1311471.0
-      throughput: 0.7625025639148711
+      inference_time: 1307557.0
+      throughput: 0.764785015108328
       estimated_peak_memory_range:
-        min: 1184645120
-        max: 1218773040
+        min: 1076854784
+        max: 1106994896
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -71,7 +73,7 @@ models:
         layers_on_gpu: 78
         layers_on_cpu: 402
         total_layers: 480
-      job_id: jwgoevw4p
+      job_id: jo5m42q75
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -80,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:17:53Z'
+    timestamp: '2024-06-22T23:26:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 1578379.0
-      throughput: 0.6335613943165742
+      inference_time: 1673478.0
+      throughput: 0.5975579003727566
       estimated_peak_memory_range:
-        min: 1049174016
-        max: 1057203192
+        min: 842801152
+        max: 3302164848
       primary_compute_unit: CPU
       precision: fp32
       layer_info:
@@ -94,7 +96,7 @@ models:
         layers_on_gpu: 78
         layers_on_cpu: 402
         total_layers: 480
-      job_id: j1pvzwm7g
+      job_id: jegnxylj5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -103,4 +105,27 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:17:54Z'
+    timestamp: '2024-06-22T23:26:50Z'
+  - torchscript_onnx_tflite:
+      inference_time: 1573212.0
+      throughput: 0.63564224020666
+      estimated_peak_memory_range:
+        min: 1351241728
+        max: 1354927960
+      primary_compute_unit: CPU
+      precision: fp32
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 78
+        layers_on_cpu: 402
+        total_layers: 480
+      job_id: jopr9q8kp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:26:51Z'
diff --git a/qai_hub_models/models/swin_base/export.py b/qai_hub_models/models/swin_base/export.py
index 79d16b96..5c7ba3ae 100644
--- a/qai_hub_models/models/swin_base/export.py
+++ b/qai_hub_models/models/swin_base/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/swin_base/model.py b/qai_hub_models/models/swin_base/model.py
index b4ba8831..34e855dd 100644
--- a/qai_hub_models/models/swin_base/model.py
+++ b/qai_hub_models/models/swin_base/model.py
@@ -29,4 +29,4 @@ def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
         replace_module_recursively(
             net, torch.nn.Linear, AutoSplitLinear, parent_module=PatchMerging
         )
-        return cls(net.eval())
+        return cls(net)
diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml
index 37ad14fa..0a8975b8 100644
--- a/qai_hub_models/models/swin_base/perf.yaml
+++ b/qai_hub_models/models/swin_base/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Swin-Base
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 38045.0
-      throughput: 26.284662899198317
+      inference_time: 37584.0
+      throughput: 26.607066836951894
       estimated_peak_memory_range:
-        min: 307200
-        max: 3648376
+        min: 126976
+        max: 3572000
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1568
-      job_id: jz57vdnq5
+      job_id: j1p3831m5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 31404.0
-      throughput: 31.84307731499172
+      inference_time: 31554.0
+      throughput: 31.691703112125246
       estimated_peak_memory_range:
-        min: 57344
-        max: 46336408
+        min: 16384
+        max: 51870480
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jo5mvzky5
+      job_id: jlpe21w8p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 63106.0
-      throughput: 15.846353754001205
+    torchscript_onnx:
+      inference_time: 63881.0
+      throughput: 15.654106854933392
       estimated_peak_memory_range:
-        min: 278528
-        max: 457269496
+        min: 86016
+        max: 471415568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1163
-      job_id: jqpyvdyrp
+      job_id: jvgd09q6p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:18:38Z'
+    timestamp: '2024-06-22T23:27:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 26266.0
-      throughput: 38.07203228508338
+      inference_time: 26582.0
+      throughput: 37.61944172748476
       estimated_peak_memory_range:
-        min: 49152
-        max: 501753168
+        min: 45056
+        max: 524204544
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1568
-      job_id: jqp4jw4qp
+      job_id: jwgom0n15
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 22072.0
-      throughput: 45.30627038782168
+      inference_time: 21873.0
+      throughput: 45.7184656882915
       estimated_peak_memory_range:
         min: 0
-        max: 409890496
+        max: 368391424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jegnrewv5
+      job_id: jygzw9j4g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 44119.0
-      throughput: 22.66597157687164
+    torchscript_onnx:
+      inference_time: 44504.0
+      throughput: 22.469890346935106
       estimated_peak_memory_range:
-        min: 643072
-        max: 204011072
+        min: 667648
+        max: 185198512
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1163
-      job_id: j2p0erx25
+      job_id: jz5wxv3zp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:18:39Z'
+    timestamp: '2024-06-22T23:27:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 38074.0
-      throughput: 26.264642538215053
+      inference_time: 37732.0
+      throughput: 26.502703275734124
       estimated_peak_memory_range:
-        min: 61440
-        max: 4041520
+        min: 86016
+        max: 4380992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1568
-      job_id: j0pxe1rj5
+      job_id: j1pv4orzp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 31252.0
-      throughput: 31.997952131063613
+      inference_time: 31187.0
+      throughput: 32.064642318914935
       estimated_peak_memory_range:
-        min: 61440
-        max: 51901248
+        min: 12288
+        max: 49081336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jep23mzxg
+      job_id: jmg981ymp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:18:37Z'
+    timestamp: '2024-06-22T23:27:46Z'
+  - torchscript_onnx_tflite:
+      inference_time: 37791.0
+      throughput: 26.461326770924295
+      estimated_peak_memory_range:
+        min: 90112
+        max: 3029064
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1568
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1568
+      job_id: j7gj1m21g
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 31145.0
+      throughput: 32.10788248515011
+      estimated_peak_memory_range:
+        min: 16384
+        max: 45743360
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1255
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1255
+      job_id: jnp13lwn5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:27:47Z'
   - torchscript_onnx_qnn:
-      inference_time: 38623.0
-      throughput: 25.89130828780778
+      inference_time: 29765.0
+      throughput: 33.59650596337981
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1255
-      job_id: jopr1y7vg
+      job_id: jz5wxv34p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 65447.0
-      throughput: 15.27953916909866
+    torchscript_onnx:
+      inference_time: 65561.0
+      throughput: 15.252970516007993
       estimated_peak_memory_range:
-        min: 552267776
-        max: 552267776
+        min: 561917952
+        max: 561917952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1163
-      job_id: j1p8w7kzp
+      job_id: jmg981yqp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:18:40Z'
+    timestamp: '2024-06-22T23:27:51Z'
diff --git a/qai_hub_models/models/swin_small/export.py b/qai_hub_models/models/swin_small/export.py
index 67677bbe..ef02503b 100644
--- a/qai_hub_models/models/swin_small/export.py
+++ b/qai_hub_models/models/swin_small/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/swin_small/model.py b/qai_hub_models/models/swin_small/model.py
index 081d731f..dd1ee1e2 100644
--- a/qai_hub_models/models/swin_small/model.py
+++ b/qai_hub_models/models/swin_small/model.py
@@ -29,4 +29,4 @@ def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
         replace_module_recursively(
             net, torch.nn.Linear, AutoSplitLinear, parent_module=PatchMerging
         )
-        return cls(net.eval())
+        return cls(net)
diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml
index 4bd928fb..16f4e5f6 100644
--- a/qai_hub_models/models/swin_small/perf.yaml
+++ b/qai_hub_models/models/swin_small/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Swin-Small
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 29054.0
-      throughput: 34.41866868589523
+      inference_time: 28880.0
+      throughput: 34.62603878116344
       estimated_peak_memory_range:
-        min: 24576
-        max: 7976680
+        min: 53248
+        max: 3319424
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1563
-      job_id: jn5q92d7p
+      job_id: jvgd09qkp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23697.0
-      throughput: 42.19943452757733
+      inference_time: 23486.0
+      throughput: 42.578557438473986
       estimated_peak_memory_range:
-        min: 0
-        max: 40982576
+        min: 40960
+        max: 42627616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: j1p3qmrx5
+      job_id: jo5m426y5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 56535.0
-      throughput: 17.688157778367383
+    torchscript_onnx:
+      inference_time: 54900.0
+      throughput: 18.214936247723134
       estimated_peak_memory_range:
-        min: 57344
-        max: 250098192
+        min: 16384
+        max: 249192608
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1158
-      job_id: jlpe4vn75
+      job_id: j2p0kql25
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:19:17Z'
+    timestamp: '2024-06-22T23:28:28Z'
   - torchscript_onnx_tflite:
       inference_time: 19652.0
       throughput: 50.8854060655404
       estimated_peak_memory_range:
-        min: 45056
-        max: 468730016
+        min: 49152
+        max: 491763984
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1563
-      job_id: j1glekqep
+      job_id: jz576wlqg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 16097.0
-      throughput: 62.123377026775174
+      inference_time: 16133.0
+      throughput: 61.98475175106924
       estimated_peak_memory_range:
         min: 0
-        max: 371590576
+        max: 338877936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: jwgoev94p
+      job_id: jegnxy3v5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 39326.0
-      throughput: 25.42846971469257
+    torchscript_onnx:
+      inference_time: 38428.0
+      throughput: 26.02269178723847
       estimated_peak_memory_range:
-        min: 651264
-        max: 174791408
+        min: 618496
+        max: 147135632
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1158
-      job_id: jygzv70zp
+      job_id: j1p889zzp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:19:18Z'
+    timestamp: '2024-06-22T23:28:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 29025.0
-      throughput: 34.45305770887166
+      inference_time: 28669.0
+      throughput: 34.88088178869162
       estimated_peak_memory_range:
-        min: 69632
-        max: 3142616
+        min: 20480
+        max: 2800768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1563
-      job_id: jw56q10vg
+      job_id: jqp48odqg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 23503.0
-      throughput: 42.54775986044335
+      inference_time: 23305.0
+      throughput: 42.90924694271615
       estimated_peak_memory_range:
         min: 36864
-        max: 38372320
+        max: 41438080
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: j7gjkl875
+      job_id: jep2j6lx5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:19:16Z'
+    timestamp: '2024-06-22T23:28:26Z'
+  - torchscript_onnx_tflite:
+      inference_time: 28806.0
+      throughput: 34.71498993265292
+      estimated_peak_memory_range:
+        min: 73728
+        max: 6136808
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1563
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1563
+      job_id: j0pxmj6jg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 23462.0
+      throughput: 42.62211235188816
+      estimated_peak_memory_range:
+        min: 167936
+        max: 42534400
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 1246
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 1246
+      job_id: jqpynw6rg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:28:27Z'
   - torchscript_onnx_qnn:
-      inference_time: 23778.0
-      throughput: 42.055681722600724
+      inference_time: 22363.0
+      throughput: 44.71671958145151
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1246
-      job_id: j1pvzwn7g
+      job_id: jopr9qevp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 58093.0
-      throughput: 17.213777907837432
+    torchscript_onnx:
+      inference_time: 56601.0
+      throughput: 17.66753237575308
       estimated_peak_memory_range:
-        min: 385679360
-        max: 385679360
+        min: 387993600
+        max: 387993600
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1158
-      job_id: jz5wm9rzg
+      job_id: jogkdn3yp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:19:19Z'
+    timestamp: '2024-06-22T23:28:31Z'
diff --git a/qai_hub_models/models/swin_tiny/export.py b/qai_hub_models/models/swin_tiny/export.py
index fbe5734d..ace0abc5 100644
--- a/qai_hub_models/models/swin_tiny/export.py
+++ b/qai_hub_models/models/swin_tiny/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/swin_tiny/model.py b/qai_hub_models/models/swin_tiny/model.py
index 2c32e595..0b293ec7 100644
--- a/qai_hub_models/models/swin_tiny/model.py
+++ b/qai_hub_models/models/swin_tiny/model.py
@@ -29,4 +29,4 @@ def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> ImagenetClassifier:
         replace_module_recursively(
             net, torch.nn.Linear, AutoSplitLinear, parent_module=PatchMerging
         )
-        return cls(net.eval())
+        return cls(net)
diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml
index 7281ba04..bf08ca33 100644
--- a/qai_hub_models/models/swin_tiny/perf.yaml
+++ b/qai_hub_models/models/swin_tiny/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Swin-Tiny
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 17582.0
-      throughput: 56.87635081333182
+      inference_time: 17334.0
+      throughput: 57.69008884273682
       estimated_peak_memory_range:
-        min: 49152
-        max: 3052248
+        min: 45056
+        max: 3426144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 837
-      job_id: jnp1q8mkg
+      job_id: j1gl7z3e5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14870.0
-      throughput: 67.24949562878278
+      inference_time: 14963.0
+      throughput: 66.83151774376796
       estimated_peak_memory_range:
-        min: 40960
-        max: 28468704
+        min: 16384
+        max: 25111704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jqp4jw2qp
+      job_id: j1pv4ov7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 33752.0
-      throughput: 29.627873903768666
+    torchscript_onnx:
+      inference_time: 32650.0
+      throughput: 30.627871362940276
       estimated_peak_memory_range:
         min: 0
-        max: 143848064
+        max: 154932224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 624
-      job_id: jopr1ymvg
+      job_id: jmg981wqp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:19:49Z'
+    timestamp: '2024-06-22T23:29:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 11836.0
-      throughput: 84.48800270361609
+      inference_time: 11831.0
+      throughput: 84.52370890034655
       estimated_peak_memory_range:
-        min: 40960
-        max: 291213504
+        min: 16384
+        max: 301706880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 837
-      job_id: jvgd7vmkg
+      job_id: jw56vjnvp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 9960.0
-      throughput: 100.40160642570281
+      inference_time: 9973.0
+      throughput: 100.2707309736288
       estimated_peak_memory_range:
-        min: 618496
-        max: 226851856
+        min: 81215488
+        max: 290065488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: j0pxe1zj5
+      job_id: j7gj1me7g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 23820.0
-      throughput: 41.98152812762385
+    torchscript_onnx:
+      inference_time: 22771.0
+      throughput: 43.91550656536823
       estimated_peak_memory_range:
-        min: 53248
-        max: 113324624
+        min: 36864
+        max: 101885200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 624
-      job_id: jep23mqxg
+      job_id: jnp13lek5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:19:50Z'
+    timestamp: '2024-06-22T23:29:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 17413.0
-      throughput: 57.42835812324125
+      inference_time: 17424.0
+      throughput: 57.3921028466483
       estimated_peak_memory_range:
-        min: 24576
-        max: 3013416
+        min: 49152
+        max: 3700560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 837
-      job_id: jz57vd8q5
+      job_id: j1p383ex5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14630.0
-      throughput: 68.3526999316473
+      inference_time: 14895.0
+      throughput: 67.1366230278617
       estimated_peak_memory_range:
-        min: 12288
-        max: 29408864
+        min: 229376
+        max: 26294976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jegnredv5
+      job_id: jygzw9rzg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:19:48Z'
+    timestamp: '2024-06-22T23:29:01Z'
+  - torchscript_onnx_tflite:
+      inference_time: 17337.0
+      throughput: 57.680106131395284
+      estimated_peak_memory_range:
+        min: 28672
+        max: 2941824
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 837
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 837
+      job_id: jwgom0345
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 14963.0
+      throughput: 66.83151774376796
+      estimated_peak_memory_range:
+        min: 40960
+        max: 28386808
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 700
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 700
+      job_id: jz5wxvqzp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:29:03Z'
   - torchscript_onnx_qnn:
-      inference_time: 14162.0
-      throughput: 70.61149555147578
+      inference_time: 13869.0
+      throughput: 72.10325185665873
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 700
-      job_id: jo5mvzly5
+      job_id: jlpe21k7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 34948.0
-      throughput: 28.613940711914847
+    torchscript_onnx:
+      inference_time: 33671.0
+      throughput: 29.69914763446289
       estimated_peak_memory_range:
-        min: 211316736
-        max: 211316736
+        min: 87080960
+        max: 87080960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 624
-      job_id: jqpyvdkrp
+      job_id: jvgd09okp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:19:50Z'
+    timestamp: '2024-06-22T23:29:06Z'
diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py
index 7c62002b..03477c9f 100644
--- a/qai_hub_models/models/trocr/export.py
+++ b/qai_hub_models/models/trocr/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -240,7 +239,7 @@ def main():
         model_cls=Model,
         components=ALL_COMPONENTS,
         supports_qnn=False,
-        supports_precompiled_ort=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml
index 25bfdd7d..0649fe59 100644
--- a/qai_hub_models/models/trocr/perf.yaml
+++ b/qai_hub_models/models/trocr/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: TrOCREncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 148428.0
-      throughput: 6.737273290753766
+      inference_time: 148085.0
+      throughput: 6.752878414424148
       estimated_peak_memory_range:
-        min: 6459392
-        max: 9952352
+        min: 8511488
+        max: 11264624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 592
-      job_id: j1p8w7dzp
+      job_id: jqp48ovqg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 109810.0
-      throughput: 9.106638739641198
+    torchscript_onnx:
+      inference_time: 108599.0
+      throughput: 9.208187920699086
       estimated_peak_memory_range:
-        min: 14303232
-        max: 127415872
+        min: 14270464
+        max: 130124584
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 396
-      job_id: jmg9947qg
+      job_id: jygzw97zg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +74,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:20:35Z'
+    timestamp: '2024-06-22T23:29:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 111077.0
-      throughput: 9.00276384850149
+      inference_time: 111580.0
+      throughput: 8.962179602079226
       estimated_peak_memory_range:
-        min: 6410240
-        max: 350751520
+        min: 6737920
+        max: 361915184
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +88,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 592
-      job_id: jn5q92x7p
+      job_id: jo5m423y5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 83685.0
-      throughput: 11.9495728027723
+    torchscript_onnx:
+      inference_time: 82794.0
+      throughput: 12.078169915694374
       estimated_peak_memory_range:
-        min: 12636160
-        max: 89203248
+        min: 10301440
+        max: 74776016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 396
-      job_id: jvgd7vykg
+      job_id: jmg9814qp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +112,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:20:37Z'
+    timestamp: '2024-06-22T23:30:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 148360.0
-      throughput: 6.740361283364789
+      inference_time: 147811.0
+      throughput: 6.765396350745209
       estimated_peak_memory_range:
-        min: 7380992
-        max: 9974128
+        min: 7270400
+        max: 10717800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +126,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 592
-      job_id: jw56q19vg
+      job_id: jopr9qyvp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:20:26Z'
-  - torchscript_onnx_ort:
-      inference_time: 109878.0
-      throughput: 9.101002930522943
+    timestamp: '2024-06-22T23:29:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 147905.0
+      throughput: 6.76109664987661
       estimated_peak_memory_range:
-        min: 28672
-        max: 28672
+        min: 7254016
+        max: 10544600
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 592
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 592
+      job_id: jqpynwdrg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:29:46Z'
+  - torchscript_onnx:
+      inference_time: 109376.0
+      throughput: 9.14277355178467
+      estimated_peak_memory_range:
+        min: 13611008
+        max: 13611008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 396
-      job_id: jmg9947vg
+      job_id: jvgd09vkp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,30 +181,30 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:20:38Z'
+    timestamp: '2024-06-22T23:30:04Z'
 - name: TrOCRDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2732.0
-      throughput: 366.03221083455344
+      inference_time: 2715.0
+      throughput: 368.3241252302026
       estimated_peak_memory_range:
         min: 12288
-        max: 2455200
+        max: 2747864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 370
+        layers_on_npu: 382
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 370
-      job_id: jogkrywy5
+        total_layers: 382
+      job_id: j0pxmjyjg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2915.0
-      throughput: 343.0531732418525
+    torchscript_onnx:
+      inference_time: 2864.0
+      throughput: 349.16201117318434
       estimated_peak_memory_range:
-        min: 28672
-        max: 588384064
+        min: 12288
+        max: 576534240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -187,7 +212,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 353
-      job_id: jnp1q8kkg
+      job_id: jz5wxv9zp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -196,28 +221,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:20:35Z'
+    timestamp: '2024-06-22T23:30:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 1997.0
-      throughput: 500.75112669003505
+      inference_time: 1974.0
+      throughput: 506.5856129685917
       estimated_peak_memory_range:
         min: 12288
-        max: 195170736
+        max: 199729808
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 370
+        layers_on_npu: 382
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 370
-      job_id: j1glek9ep
+        total_layers: 382
+      job_id: jegnxyev5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2106.0
-      throughput: 474.8338081671415
+    torchscript_onnx:
+      inference_time: 2283.0
+      throughput: 438.02014892685065
       estimated_peak_memory_range:
         min: 0
-        max: 49553392
+        max: 35734832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -225,7 +250,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 353
-      job_id: jz5wm90jg
+      job_id: jnp13l8k5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -234,21 +259,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:20:37Z'
+    timestamp: '2024-06-22T23:30:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 2737.0
-      throughput: 365.36353671903544
+      inference_time: 2722.0
+      throughput: 367.37692872887584
       estimated_peak_memory_range:
-        min: 16384
-        max: 3465512
+        min: 12288
+        max: 2054840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 370
+        layers_on_npu: 382
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 370
-      job_id: j1p3qmlx5
+        total_layers: 382
+      job_id: jep2j6mx5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -257,13 +282,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:20:27Z'
-  - torchscript_onnx_ort:
-      inference_time: 2812.0
-      throughput: 355.6187766714082
+    timestamp: '2024-06-22T23:29:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2729.0
+      throughput: 366.43459142543054
+      estimated_peak_memory_range:
+        min: 12288
+        max: 2338424
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 382
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 382
+      job_id: j2p0kqr25
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:29:47Z'
+  - torchscript_onnx:
+      inference_time: 2636.0
+      throughput: 379.3626707132018
       estimated_peak_memory_range:
-        min: 352550912
-        max: 352550912
+        min: 347582464
+        max: 347582464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -271,7 +319,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 353
-      job_id: jnp1q8klg
+      job_id: jz576wdqg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -280,4 +328,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:20:39Z'
+    timestamp: '2024-06-22T23:30:05Z'
diff --git a/qai_hub_models/models/unet_segmentation/app.py b/qai_hub_models/models/unet_segmentation/app.py
index 41683171..6bbf207d 100644
--- a/qai_hub_models/models/unet_segmentation/app.py
+++ b/qai_hub_models/models/unet_segmentation/app.py
@@ -36,7 +36,6 @@ def predict(self, image: Image) -> torch.Tensor:
         """
 
         img = preprocess_PIL_image(image)
-        with torch.no_grad():
-            out = self.model(img)
+        out = self.model(img)
         mask = out.argmax(dim=1)
         return mask[0].bool().numpy()
diff --git a/qai_hub_models/models/unet_segmentation/export.py b/qai_hub_models/models/unet_segmentation/export.py
index 6274534f..9081590a 100644
--- a/qai_hub_models/models/unet_segmentation/export.py
+++ b/qai_hub_models/models/unet_segmentation/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,13 +117,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -169,7 +168,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -187,7 +186,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -209,7 +208,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/unet_segmentation/model.py b/qai_hub_models/models/unet_segmentation/model.py
index 32290667..8a03fd6e 100644
--- a/qai_hub_models/models/unet_segmentation/model.py
+++ b/qai_hub_models/models/unet_segmentation/model.py
@@ -37,7 +37,7 @@ def from_pretrained(cls, ckpt_url: Optional[str] = DEFAULT_WEIGHTS):
         if ckpt_url is not None:
             state_dict = load_torch(ckpt_url)
             net.load_state_dict(state_dict)
-        return cls(net.eval())
+        return cls(net)
 
     def forward(self, image: torch.Tensor):
         """
diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml
index b4a6ea65..bade6f1c 100644
--- a/qai_hub_models/models/unet_segmentation/perf.yaml
+++ b/qai_hub_models/models/unet_segmentation/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Unet-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 159228.0
-      throughput: 6.280302459366443
+      inference_time: 160376.0
+      throughput: 6.235346934703447
       estimated_peak_memory_range:
-        min: 6418432
-        max: 111435960
+        min: 6463488
+        max: 463915992
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jo5mvz7q5
+      job_id: jo5m42zy5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 156519.0
-      throughput: 6.389000696401076
+      inference_time: 155942.0
+      throughput: 6.412640597145092
       estimated_peak_memory_range:
-        min: 9871360
-        max: 31082800
+        min: 9875456
+        max: 28375952
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: jep23m1mg
+      job_id: jqpynw4rg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 165647.0
-      throughput: 6.03693396197939
+    torchscript_onnx:
+      inference_time: 165270.0
+      throughput: 6.05070490712168
       estimated_peak_memory_range:
-        min: 13611008
-        max: 154509064
+        min: 13549568
+        max: 156940568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jn5q92nmp
+      job_id: j1gl7z0e5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:21:28Z'
+    timestamp: '2024-06-22T23:30:58Z'
   - torchscript_onnx_tflite:
-      inference_time: 121153.0
-      throughput: 8.254025901133279
+      inference_time: 118210.0
+      throughput: 8.459521191100583
       estimated_peak_memory_range:
-        min: 6619136
-        max: 339596672
+        min: 5234688
+        max: 340954016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jegnre4m5
+      job_id: jegnxy9v5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 110026.0
-      throughput: 9.0887608383473
+      inference_time: 110282.0
+      throughput: 9.06766290056401
       estimated_peak_memory_range:
-        min: 9850880
-        max: 91369248
+        min: 9863168
+        max: 90274032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: j2p0erwe5
+      job_id: j2p0kq125
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 119057.0
-      throughput: 8.399338132155187
+    torchscript_onnx:
+      inference_time: 121360.0
+      throughput: 8.23994726433751
       estimated_peak_memory_range:
-        min: 22478848
-        max: 104785056
+        min: 15745024
+        max: 95078624
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: j1glekdlp
+      job_id: jw56vj3vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:21:30Z'
+    timestamp: '2024-06-22T23:30:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 157133.0
-      throughput: 6.364035562230722
+      inference_time: 158593.0
+      throughput: 6.305448538081756
       estimated_peak_memory_range:
-        min: 6680576
-        max: 111633312
+        min: 6676480
+        max: 111417824
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 31
-      job_id: jopr1yreg
+      job_id: jopr9q4vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 148329.0
-      throughput: 6.741769984291676
+      inference_time: 149290.0
+      throughput: 6.698372295532185
       estimated_peak_memory_range:
-        min: 9969664
-        max: 32982776
+        min: 9900032
+        max: 30874704
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: jogkry1o5
+      job_id: jogkdnlyp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:21:27Z'
+    timestamp: '2024-06-22T23:30:56Z'
+  - torchscript_onnx_tflite:
+      inference_time: 161552.0
+      throughput: 6.189957413092998
+      estimated_peak_memory_range:
+        min: 6701056
+        max: 463829976
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 31
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 31
+      job_id: jep2j67x5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 146922.0
+      throughput: 6.806332611862077
+      estimated_peak_memory_range:
+        min: 8839168
+        max: 29513312
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 51
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 51
+      job_id: jn5qwk775
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:30:57Z'
   - torchscript_onnx_qnn:
-      inference_time: 190476.0
-      throughput: 5.25000525000525
+      inference_time: 133369.0
+      throughput: 7.497994286528353
       estimated_peak_memory_range:
-        min: 9854976
-        max: 9854976
+        min: 9850880
+        max: 9850880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 51
-      job_id: j1p8w7n8p
+      job_id: j1p8893zp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 146401.0
-      throughput: 6.830554436103578
+    torchscript_onnx:
+      inference_time: 146596.0
+      throughput: 6.821468525744222
       estimated_peak_memory_range:
-        min: 17457152
-        max: 17457152
+        min: 9854976
+        max: 9854976
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 53
-      job_id: jw56q1x7g
+      job_id: j1p3834x5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:21:31Z'
+    timestamp: '2024-06-22T23:31:01Z'
diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py
index a4f94916..80db8b67 100644
--- a/qai_hub_models/models/vit/export.py
+++ b/qai_hub_models/models/vit/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,7 +116,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(
         model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
@@ -124,7 +123,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -190,7 +189,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -220,7 +219,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml
index 459d6f2d..53eb8912 100644
--- a/qai_hub_models/models/vit/perf.yaml
+++ b/qai_hub_models/models/vit/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: VIT
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 78496.0
-      throughput: 12.73950264981655
+      inference_time: 78563.0
+      throughput: 12.728638163002941
       estimated_peak_memory_range:
         min: 102400
-        max: 3437176
+        max: 3460416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 535
-      job_id: jwgoevxdp
+      job_id: j1pv4o17p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 103100.0
-      throughput: 9.699321047526674
+    torchscript_onnx:
+      inference_time: 92821.0
+      throughput: 10.77342411738723
       estimated_peak_memory_range:
-        min: 110592
-        max: 441770400
+        min: 49152
+        max: 443858496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 376
-      job_id: jnp1q89lg
+      job_id: jqp48oxqg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +74,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:22:03Z'
+    timestamp: '2024-06-22T23:31:35Z'
   - torchscript_onnx_tflite:
-      inference_time: 56654.0
-      throughput: 17.65100434214707
+      inference_time: 56889.0
+      throughput: 17.578090667791663
       estimated_peak_memory_range:
-        min: 77824
-        max: 375276272
+        min: 114688
+        max: 382195568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -86,14 +88,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 535
-      job_id: j1pvzw8mg
+      job_id: j7gj1m07g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 76545.0
-      throughput: 13.064210595074792
+    torchscript_onnx:
+      inference_time: 68957.0
+      throughput: 14.501790971184942
       estimated_peak_memory_range:
-        min: 684032
-        max: 513094432
+        min: 404545536
+        max: 941258256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 376
-      job_id: jvgd7vklg
+      job_id: j0pxmj7jg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +112,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:22:03Z'
+    timestamp: '2024-06-22T23:31:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 78627.0
-      throughput: 12.718277436504
+      inference_time: 78470.0
+      throughput: 12.743723716069836
       estimated_peak_memory_range:
-        min: 110592
-        max: 6215968
+        min: 106496
+        max: 5373496
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -124,7 +126,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 535
-      job_id: j7gjkl985
+      job_id: jlpe21r7p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:21:57Z'
-  - torchscript_onnx_ort:
-      inference_time: 102862.0
-      throughput: 9.721763138962883
+    timestamp: '2024-06-22T23:31:27Z'
+  - torchscript_onnx_tflite:
+      inference_time: 78565.0
+      throughput: 12.728314134792846
+      estimated_peak_memory_range:
+        min: 139264
+        max: 3777240
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 535
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 535
+      job_id: jygzw9xzg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:31:28Z'
+  - torchscript_onnx:
+      inference_time: 94524.0
+      throughput: 10.579323769624645
       estimated_peak_memory_range:
-        min: 158560256
-        max: 158560256
+        min: 251416576
+        max: 251416576
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 376
-      job_id: jz57vdqr5
+      job_id: jo5m42wy5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:22:04Z'
+    timestamp: '2024-06-22T23:31:38Z'
diff --git a/qai_hub_models/models/whisper_base_en/demo.py b/qai_hub_models/models/whisper_base_en/demo.py
index 9bbf714a..7dd801d6 100644
--- a/qai_hub_models/models/whisper_base_en/demo.py
+++ b/qai_hub_models/models/whisper_base_en/demo.py
@@ -6,8 +6,8 @@
 from qai_hub_models.models.whisper_base_en.model import WhisperBaseEn
 
 
-def main():
-    whisper_demo(WhisperBaseEn)
+def main(is_test: bool = False):
+    whisper_demo(WhisperBaseEn, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/whisper_base_en/export.py b/qai_hub_models/models/whisper_base_en/export.py
index 4bb6b358..d7e186e1 100644
--- a/qai_hub_models/models/whisper_base_en/export.py
+++ b/qai_hub_models/models/whisper_base_en/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml
index 881707cf..95dfe932 100644
--- a/qai_hub_models/models/whisper_base_en/perf.yaml
+++ b/qai_hub_models/models/whisper_base_en/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: WhisperEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 158811.0
-      throughput: 6.296793043303046
+      inference_time: 159078.0
+      throughput: 6.2862243679201395
       estimated_peak_memory_range:
-        min: 31092736
-        max: 131633968
+        min: 16384
+        max: 101374008
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 419
         layers_on_cpu: 0
         total_layers: 419
-      job_id: j0pxe1w95
+      job_id: jopr9q1vp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 624615.0
-      throughput: 1.6009862075038224
+      inference_time: 467575.0
+      throughput: 2.1386943271132974
       estimated_peak_memory_range:
-        min: 131072
-        max: 82142360
+        min: 69632
+        max: 88169880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 580
-      job_id: j2p0erne5
+      job_id: jw56vjqvp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 394348.0
-      throughput: 2.5358312962155254
+    torchscript_onnx:
+      inference_time: 426509.0
+      throughput: 2.3446164090324
       estimated_peak_memory_range:
-        min: 4792320
-        max: 165488160
+        min: 73859072
+        max: 241652560
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: j1pvzwjmg
+      job_id: jvgd097kp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:22:51Z'
+    timestamp: '2024-06-22T23:32:31Z'
   - torchscript_onnx_tflite:
-      inference_time: 122023.0
-      throughput: 8.195176319218508
+      inference_time: 123208.0
+      throughput: 8.116356080773976
       estimated_peak_memory_range:
-        min: 37249024
-        max: 82154976
+        min: 35913728
+        max: 83180512
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 419
         layers_on_cpu: 0
         total_layers: 419
-      job_id: jegnrejm5
+      job_id: jqpynwvrg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 452457.0
-      throughput: 2.210154777139043
+      inference_time: 331018.0
+      throughput: 3.0209837531493755
       estimated_peak_memory_range:
         min: 0
-        max: 198495008
+        max: 191295920
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 580
-      job_id: jogkryjo5
+      job_id: jwgom0e45
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 300384.0
-      throughput: 3.3290721210184295
+    torchscript_onnx:
+      inference_time: 302687.0
+      throughput: 3.3037428102297093
       estimated_peak_memory_range:
-        min: 62181376
-        max: 262749552
+        min: 66433024
+        max: 270943840
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: jlpe4vj05
+      job_id: jmg9819vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:22:52Z'
+    timestamp: '2024-06-22T23:32:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 158001.0
-      throughput: 6.329073866621098
+      inference_time: 157415.0
+      throughput: 6.352634755264746
       estimated_peak_memory_range:
-        min: 12288
-        max: 104601560
+        min: 22224896
+        max: 123803800
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 419
         layers_on_cpu: 0
         total_layers: 419
-      job_id: jep23m2mg
+      job_id: j1p889wzp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 623834.0
-      throughput: 1.602990539149838
+      inference_time: 467371.0
+      throughput: 2.1396278331347047
       estimated_peak_memory_range:
-        min: 139264
-        max: 76510216
+        min: 53248
+        max: 90413376
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 580
-      job_id: j1p3qmyz5
+      job_id: jygzw9vzg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:22:49Z'
+    timestamp: '2024-06-22T23:32:26Z'
+  - torchscript_onnx_tflite:
+      inference_time: 160146.0
+      throughput: 6.244302074357149
+      estimated_peak_memory_range:
+        min: 0
+        max: 61744728
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 419
+        layers_on_cpu: 0
+        total_layers: 419
+      job_id: jn5qwk975
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 460716.0
+      throughput: 2.1705345592512524
+      estimated_peak_memory_range:
+        min: 0
+        max: 66333424
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 580
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 580
+      job_id: jmg9819qp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:32:29Z'
   - torchscript_onnx_qnn:
-      inference_time: 454926.0
-      throughput: 2.198159700698575
+      inference_time: 432301.0
+      throughput: 2.313203069157832
       estimated_peak_memory_range:
         min: 962560
         max: 962560
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 579
-      job_id: j1glekjlp
+      job_id: j7gj1mk7g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 383597.0
-      throughput: 2.606902556589337
+    torchscript_onnx:
+      inference_time: 383599.0
+      throughput: 2.606888964778323
       estimated_peak_memory_range:
-        min: 139669504
-        max: 139669504
+        min: 138711040
+        max: 138711040
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 380
-      job_id: jz5wm9jjg
+      job_id: jvgd097lp
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,15 +256,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:22:54Z'
+    timestamp: '2024-06-22T23:32:35Z'
 - name: WhisperDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 24389.0
-      throughput: 41.00209110664644
+      inference_time: 23491.0
+      throughput: 42.56949470009791
       estimated_peak_memory_range:
-        min: 5771264
-        max: 8649416
+        min: 5779456
+        max: 8829744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -232,14 +272,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 983
-      job_id: jo5mvzjq5
+      job_id: jep2j63x5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 22769.0
-      throughput: 43.91936404760859
+      inference_time: 23659.0
+      throughput: 42.267213322625636
       estimated_peak_memory_range:
-        min: 42414080
-        max: 60923784
+        min: 42450944
+        max: 57304400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -247,14 +287,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: j1p8w7l8p
+      job_id: j1p383qx5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 24751.0
-      throughput: 40.402407983515815
+    torchscript_onnx:
+      inference_time: 17587.0
+      throughput: 56.860180815374996
       estimated_peak_memory_range:
-        min: 12656640
-        max: 328987984
+        min: 11915264
+        max: 331768528
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -262,7 +302,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 844
-      job_id: j7gjklj85
+      job_id: jz5wxvmjp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -271,13 +311,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:22:51Z'
+    timestamp: '2024-06-22T23:32:31Z'
   - torchscript_onnx_tflite:
-      inference_time: 18854.0
-      throughput: 53.039142887450936
+      inference_time: 19050.0
+      throughput: 52.493438320209975
       estimated_peak_memory_range:
-        min: 4575232
-        max: 93812240
+        min: 3858432
+        max: 104109312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -285,14 +325,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 983
-      job_id: jopr1yzeg
+      job_id: j2p0kqe25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 18709.0
-      throughput: 53.450211128333954
+      inference_time: 18571.0
+      throughput: 53.84739647838027
       estimated_peak_memory_range:
-        min: 42438656
-        max: 323848592
+        min: 42414080
+        max: 290343760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -300,14 +340,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: jn5q92jmp
+      job_id: j1pv4oz7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 20257.0
-      throughput: 49.36565137976996
+    torchscript_onnx:
+      inference_time: 14198.0
+      throughput: 70.4324552753909
       estimated_peak_memory_range:
-        min: 52916224
-        max: 140494080
+        min: 72601600
+        max: 138332928
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -315,7 +355,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 844
-      job_id: jygzv716p
+      job_id: jnp13lql5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -324,13 +364,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:22:53Z'
+    timestamp: '2024-06-22T23:32:33Z'
   - torchscript_onnx_tflite:
-      inference_time: 23324.0
-      throughput: 42.87429257417253
+      inference_time: 23883.0
+      throughput: 41.87078675208307
       estimated_peak_memory_range:
-        min: 5750784
-        max: 9075392
+        min: 5771264
+        max: 10399912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -338,14 +378,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 983
-      job_id: jqpyvd94p
+      job_id: jogkdnryp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 24053.0
-      throughput: 41.57485552737704
+      inference_time: 22785.0
+      throughput: 43.88852315119596
       estimated_peak_memory_range:
-        min: 42450944
-        max: 59016968
+        min: 42438656
+        max: 59162872
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -353,7 +393,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: jwgoevjdp
+      job_id: jz5wxvmzp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -362,10 +402,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:22:49Z'
+    timestamp: '2024-06-22T23:32:27Z'
+  - torchscript_onnx_tflite:
+      inference_time: 23568.0
+      throughput: 42.43041412084182
+      estimated_peak_memory_range:
+        min: 5783552
+        max: 8704464
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 983
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 983
+      job_id: j1gl7zee5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 23732.0
+      throughput: 42.13719871902916
+      estimated_peak_memory_range:
+        min: 42446848
+        max: 58833576
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 821
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 821
+      job_id: jnp13lqk5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:32:29Z'
   - torchscript_onnx_qnn:
-      inference_time: 13816.0
-      throughput: 72.37984944991314
+      inference_time: 10875.0
+      throughput: 91.95402298850574
       estimated_peak_memory_range:
         min: 42455040
         max: 42455040
@@ -376,14 +454,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 821
-      job_id: jw56q1k7g
+      job_id: jlpe2147p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 20016.0
-      throughput: 49.96003197442047
+    torchscript_onnx:
+      inference_time: 14677.0
+      throughput: 68.13381481229133
       estimated_peak_memory_range:
-        min: 45969408
-        max: 45969408
+        min: 52908032
+        max: 52908032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -391,7 +469,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 844
-      job_id: jmg9946vg
+      job_id: jz576w6rg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -400,4 +478,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:22:55Z'
+    timestamp: '2024-06-22T23:32:35Z'
diff --git a/qai_hub_models/models/whisper_base_en/requirements.txt b/qai_hub_models/models/whisper_base_en/requirements.txt
index fa34d4f8..1b6cbc24 100644
--- a/qai_hub_models/models/whisper_base_en/requirements.txt
+++ b/qai_hub_models/models/whisper_base_en/requirements.txt
@@ -1,2 +1,4 @@
 openai-whisper==20230314
 scipy==1.8.1
+audio2numpy==0.1.2
+samplerate==0.2.1
diff --git a/qai_hub_models/models/whisper_base_en/test.py b/qai_hub_models/models/whisper_base_en/test.py
index aeb74e53..d94d4bf5 100644
--- a/qai_hub_models/models/whisper_base_en/test.py
+++ b/qai_hub_models/models/whisper_base_en/test.py
@@ -19,4 +19,4 @@ def test_transcribe():
 
 
 def test_demo():
-    demo_main()
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/whisper_small_en/demo.py b/qai_hub_models/models/whisper_small_en/demo.py
index c3100f59..a290f016 100644
--- a/qai_hub_models/models/whisper_small_en/demo.py
+++ b/qai_hub_models/models/whisper_small_en/demo.py
@@ -6,8 +6,8 @@
 from qai_hub_models.models.whisper_small_en.model import WhisperSmallEn
 
 
-def main():
-    whisper_demo(WhisperSmallEn)
+def main(is_test: bool = False):
+    whisper_demo(WhisperSmallEn, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py
index e6937074..5a71e037 100644
--- a/qai_hub_models/models/whisper_small_en/export.py
+++ b/qai_hub_models/models/whisper_small_en/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml
index aae6bde6..7b5b5efb 100644
--- a/qai_hub_models/models/whisper_small_en/perf.yaml
+++ b/qai_hub_models/models/whisper_small_en/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: WhisperEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 610635.0
-      throughput: 1.6376395064154528
+      inference_time: 615850.0
+      throughput: 1.6237720224080539
       estimated_peak_memory_range:
-        min: 8286208
-        max: 437557824
+        min: 55234560
+        max: 505232120
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 911
         layers_on_cpu: 0
         total_layers: 911
-      job_id: jz57vdzr5
+      job_id: jo5m424q5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1969063.0
-      throughput: 0.5078557669307686
+      inference_time: 1802215.0
+      throughput: 0.5548727538057335
       estimated_peak_memory_range:
-        min: 1097728
-        max: 226008440
+        min: 61440
+        max: 235566064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1474
-      job_id: jep23m8mg
+      job_id: jn5qwkwm5
+      job_status: Passed
+    torchscript_onnx:
+      inference_time: 1696162.0
+      throughput: 0.5895663268013315
+      estimated_peak_memory_range:
+        min: 16384
+        max: 452284040
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 884
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 884
+      job_id: jmg9818vp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:24:00Z'
+    timestamp: '2024-06-22T23:33:56Z'
   - torchscript_onnx_tflite:
-      inference_time: 467725.0
-      throughput: 2.1380084451333583
+      inference_time: 463964.0
+      throughput: 2.155339638420222
       estimated_peak_memory_range:
-        min: 111644672
-        max: 209573760
+        min: 1200128
+        max: 100685248
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -86,14 +103,14 @@ models:
         layers_on_gpu: 911
         layers_on_cpu: 0
         total_layers: 911
-      job_id: j0pxe1v95
+      job_id: jopr9q9ep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1435234.0
-      throughput: 0.6967504950412268
+      inference_time: 1352979.0
+      throughput: 0.7391097718442046
       estimated_peak_memory_range:
         min: 0
-        max: 570396624
+        max: 565732080
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1474
-      job_id: j2p0erye5
+      job_id: jw56vjv7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1240429.0
-      throughput: 0.8061727031535058
+    torchscript_onnx:
+      inference_time: 1259184.0
+      throughput: 0.7941651101030509
       estimated_peak_memory_range:
-        min: 350531584
-        max: 914876112
+        min: 995328
+        max: 564432352
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 884
-      job_id: j1pvzw3mg
+      job_id: jvgd090lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -125,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:24:10Z'
+    timestamp: '2024-06-22T23:33:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 611130.0
-      throughput: 1.6363130594145272
+      inference_time: 611863.0
+      throughput: 1.6343527881241389
       estimated_peak_memory_range:
-        min: 68825088
-        max: 504071032
+        min: 12288
+        max: 454968200
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -139,7 +156,7 @@ models:
         layers_on_gpu: 911
         layers_on_cpu: 0
         total_layers: 911
-      job_id: jegnre2m5
+      job_id: jqpynwn4g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +165,33 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:23:58Z'
+    timestamp: '2024-06-22T23:33:39Z'
+  - torchscript_onnx_tflite:
+      inference_time: 613954.0
+      throughput: 1.6287865214657775
+      estimated_peak_memory_range:
+        min: 12288
+        max: 456527760
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 911
+        layers_on_cpu: 0
+        total_layers: 911
+      job_id: j1p88988p
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:33:41Z'
   - torchscript_onnx_qnn:
-      inference_time: 1682160.0
-      throughput: 0.5944737718171874
+      inference_time: 1093546.0
+      throughput: 0.9144562734443727
       estimated_peak_memory_range:
         min: 962560
         max: 962560
@@ -162,14 +202,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 1473
-      job_id: jogkryzo5
+      job_id: jwgom0md5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1497981.0
-      throughput: 0.667565209438571
+    torchscript_onnx:
+      inference_time: 1504227.0
+      throughput: 0.6647932792058645
       estimated_peak_memory_range:
-        min: 555839488
-        max: 555839488
+        min: 555786240
+        max: 555786240
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -177,7 +217,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 884
-      job_id: jlpe4v905
+      job_id: jqp48omlg
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -186,15 +226,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:24:12Z'
+    timestamp: '2024-06-22T23:34:01Z'
 - name: WhisperDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 26644.0
-      throughput: 37.53190211679928
+      inference_time: 26012.0
+      throughput: 38.44379517145933
       estimated_peak_memory_range:
-        min: 16855040
-        max: 20865456
+        min: 16715776
+        max: 20608968
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -202,14 +242,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2573
-      job_id: jqp4jwqlp
+      job_id: jegnxyxm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 24731.0
-      throughput: 40.43508147668918
+      inference_time: 24923.0
+      throughput: 40.123580628335276
       estimated_peak_memory_range:
-        min: 124076032
-        max: 200059296
+        min: 120889344
+        max: 195142832
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -217,7 +257,22 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: jqpyvde4p
+      job_id: j1gl7z7l5
+      job_status: Passed
+    torchscript_onnx:
+      inference_time: 63263.0
+      throughput: 15.807027804561908
+      estimated_peak_memory_range:
+        min: 44670976
+        max: 556541120
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 2302
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 2302
+      job_id: jnp13l3l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -226,13 +281,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:24:00Z'
+    timestamp: '2024-06-22T23:33:57Z'
   - torchscript_onnx_tflite:
-      inference_time: 19793.0
-      throughput: 50.52291214065579
+      inference_time: 19740.0
+      throughput: 50.65856129685917
       estimated_peak_memory_range:
-        min: 16777216
-        max: 1154461280
+        min: 15392768
+        max: 1189848448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -240,14 +295,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2573
-      job_id: jo5mvzrq5
+      job_id: jep2j6jm5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 19453.0
-      throughput: 51.40595280933532
+      inference_time: 19402.0
+      throughput: 51.541078239356764
       estimated_peak_memory_range:
-        min: 72151040
-        max: 864487680
+        min: 86843392
+        max: 784204144
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -255,14 +310,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: j1p8w7o8p
+      job_id: j1p3838z5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 53273.0
-      throughput: 18.77123495954799
+    torchscript_onnx:
+      inference_time: 53333.0
+      throughput: 18.750117188232426
       estimated_peak_memory_range:
-        min: 50139136
-        max: 319234896
+        min: 86953984
+        max: 307892128
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -270,7 +325,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2302
-      job_id: j7gjklx85
+      job_id: jz576wkrg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -279,13 +334,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:24:11Z'
+    timestamp: '2024-06-22T23:33:59Z'
   - torchscript_onnx_tflite:
-      inference_time: 27029.0
-      throughput: 36.997299197158604
+      inference_time: 27116.0
+      throughput: 36.87859566307715
       estimated_peak_memory_range:
-        min: 16769024
-        max: 20284792
+        min: 14921728
+        max: 18533312
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -293,14 +348,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2573
-      job_id: jopr1ykeg
+      job_id: j2p0kqke5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 25818.0
-      throughput: 38.73266713145867
+      inference_time: 24249.0
+      throughput: 41.23881397171017
       estimated_peak_memory_range:
-        min: 127201280
-        max: 197556544
+        min: 127119360
+        max: 201133888
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -308,7 +363,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: jw56q167g
+      job_id: jlpe2120p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -317,13 +372,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:24:07Z'
+    timestamp: '2024-06-22T23:33:52Z'
+  - torchscript_onnx_tflite:
+      inference_time: 26499.0
+      throughput: 37.73727310464546
+      estimated_peak_memory_range:
+        min: 12496896
+        max: 15155192
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 2573
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 2573
+      job_id: jogkdndop
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 25155.0
+      throughput: 39.75352812562115
+      estimated_peak_memory_range:
+        min: 127160320
+        max: 197620136
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 2255
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 2255
+      job_id: jz5wxvxjp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:33:55Z'
   - torchscript_onnx_qnn:
-      inference_time: 20402.0
-      throughput: 49.01480247034605
+      inference_time: 17206.0
+      throughput: 58.119260723003606
       estimated_peak_memory_range:
-        min: 127381504
-        max: 127381504
+        min: 127369216
+        max: 127369216
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -331,14 +424,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2255
-      job_id: jn5q928mp
+      job_id: j1pv4o4mp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 53485.0
-      throughput: 18.696830887164626
+    torchscript_onnx:
+      inference_time: 51986.0
+      throughput: 19.235948139883813
       estimated_peak_memory_range:
-        min: 342065152
-        max: 342065152
+        min: 211558400
+        max: 211558400
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -346,7 +439,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 2302
-      job_id: jygzv7e6p
+      job_id: j0pxmj39g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -355,4 +448,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:24:13Z'
+    timestamp: '2024-06-22T23:34:02Z'
diff --git a/qai_hub_models/models/whisper_small_en/requirements.txt b/qai_hub_models/models/whisper_small_en/requirements.txt
index fa34d4f8..1b6cbc24 100644
--- a/qai_hub_models/models/whisper_small_en/requirements.txt
+++ b/qai_hub_models/models/whisper_small_en/requirements.txt
@@ -1,2 +1,4 @@
 openai-whisper==20230314
 scipy==1.8.1
+audio2numpy==0.1.2
+samplerate==0.2.1
diff --git a/qai_hub_models/models/whisper_small_en/test.py b/qai_hub_models/models/whisper_small_en/test.py
index aeb74e53..d94d4bf5 100644
--- a/qai_hub_models/models/whisper_small_en/test.py
+++ b/qai_hub_models/models/whisper_small_en/test.py
@@ -19,4 +19,4 @@ def test_transcribe():
 
 
 def test_demo():
-    demo_main()
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/whisper_tiny_en/demo.py b/qai_hub_models/models/whisper_tiny_en/demo.py
index 073ab120..c1f04b61 100644
--- a/qai_hub_models/models/whisper_tiny_en/demo.py
+++ b/qai_hub_models/models/whisper_tiny_en/demo.py
@@ -6,8 +6,8 @@
 from qai_hub_models.models.whisper_tiny_en.model import WhisperTinyEn
 
 
-def main():
-    whisper_demo(WhisperTinyEn)
+def main(is_test: bool = False):
+    whisper_demo(WhisperTinyEn, is_test)
 
 
 if __name__ == "__main__":
diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py
index 050e09fa..913205c8 100644
--- a/qai_hub_models/models/whisper_tiny_en/export.py
+++ b/qai_hub_models/models/whisper_tiny_en/export.py
@@ -33,7 +33,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     components: Optional[List[str]] = None,
     skip_profiling: bool = False,
@@ -127,7 +127,6 @@ def export_model(
     for component_name, component in components_dict.items():
         # Trace the model
         input_spec = component.get_input_spec()
-        component.eval()
         source_model = torch.jit.trace(
             component.to("cpu"), make_torch_inputs(input_spec)
         )
@@ -194,7 +193,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml
index cf5d7cdb..84c8085d 100644
--- a/qai_hub_models/models/whisper_tiny_en/perf.yaml
+++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: WhisperEncoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 68470.0
-      throughput: 14.604936468526361
+      inference_time: 69532.0
+      throughput: 14.38186734165564
       estimated_peak_memory_range:
-        min: 16613376
-        max: 64496288
+        min: 20480
+        max: 63826120
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 271
         layers_on_cpu: 0
         total_layers: 271
-      job_id: jnp1q80lg
+      job_id: jopr9qoep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 286944.0
-      throughput: 3.485000557600089
+      inference_time: 285533.0
+      throughput: 3.502222159960495
       estimated_peak_memory_range:
         min: 1019904
-        max: 52873616
+        max: 55271304
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 338
-      job_id: jegnreym5
+      job_id: jw56vjr7p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,13 +74,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:24:44Z'
+    timestamp: '2024-06-22T23:34:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 54112.0
-      throughput: 18.48018923713779
+      inference_time: 54335.0
+      throughput: 18.40434342504831
       estimated_peak_memory_range:
-        min: 0
-        max: 36724816
+        min: 4096
+        max: 34290736
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -86,14 +88,14 @@ models:
         layers_on_gpu: 271
         layers_on_cpu: 0
         total_layers: 271
-      job_id: jz57vdwr5
+      job_id: jqpynwq4g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 218003.0
-      throughput: 4.587092838171952
+      inference_time: 221234.0
+      throughput: 4.520100888651835
       estimated_peak_memory_range:
-        min: 406650880
-        max: 543573456
+        min: 995328
+        max: 136088064
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 338
-      job_id: jep23m6mg
+      job_id: jwgom0od5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,13 +112,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:24:46Z'
+    timestamp: '2024-06-22T23:34:40Z'
   - torchscript_onnx_tflite:
-      inference_time: 68514.0
-      throughput: 14.595557112414982
+      inference_time: 68688.0
+      throughput: 14.558583740973678
       estimated_peak_memory_range:
-        min: 18030592
-        max: 66868584
+        min: 13635584
+        max: 52716856
       primary_compute_unit: GPU
       precision: fp16
       layer_info:
@@ -124,14 +126,14 @@ models:
         layers_on_gpu: 271
         layers_on_cpu: 0
         total_layers: 271
-      job_id: j0pxe1j95
+      job_id: j1p88968p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 288936.0
-      throughput: 3.4609740565384723
+      inference_time: 285382.0
+      throughput: 3.5040752395035426
       estimated_peak_memory_range:
-        min: 159744
-        max: 53294424
+        min: 16384
+        max: 53570416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -139,7 +141,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 338
-      job_id: jogkryno5
+      job_id: jygzw986g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -148,10 +150,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:24:50Z'
+    timestamp: '2024-06-22T23:34:44Z'
+  - torchscript_onnx_tflite:
+      inference_time: 68319.0
+      throughput: 14.637216586893837
+      estimated_peak_memory_range:
+        min: 16384
+        max: 39529544
+      primary_compute_unit: GPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 0
+        layers_on_gpu: 271
+        layers_on_cpu: 0
+        total_layers: 271
+      job_id: jn5qwkzm5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 287850.0
+      throughput: 3.4740316136876848
+      estimated_peak_memory_range:
+        min: 135168
+        max: 53407928
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 338
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 338
+      job_id: jmg981kvp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:34:47Z'
   - torchscript_onnx_qnn:
-      inference_time: 237871.0
-      throughput: 4.203959288858247
+      inference_time: 240171.0
+      throughput: 4.1637000303950105
       estimated_peak_memory_range:
         min: 962560
         max: 962560
@@ -162,7 +202,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 337
-      job_id: j2p0erqe5
+      job_id: j7gj1mo8g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -171,15 +211,15 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:24:48Z'
+    timestamp: '2024-06-22T23:34:42Z'
 - name: WhisperDecoder
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 3853.0
-      throughput: 259.53802232026993
+      inference_time: 3849.0
+      throughput: 259.80774227071964
       estimated_peak_memory_range:
         min: 2973696
-        max: 6011536
+        max: 7559016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -187,14 +227,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: jvgd7vwlg
+      job_id: jep2j64m5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3672.0
-      throughput: 272.33115468409585
+      inference_time: 3616.0
+      throughput: 276.5486725663717
       estimated_peak_memory_range:
-        min: 21250048
-        max: 48536944
+        min: 9072640
+        max: 46683616
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -202,14 +242,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: jopr1yqeg
+      job_id: j1p383xz5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5299.0
-      throughput: 188.71485185884129
+    torchscript_onnx:
+      inference_time: 5400.0
+      throughput: 185.1851851851852
       estimated_peak_memory_range:
-        min: 6336512
-        max: 214237680
+        min: 6352896
+        max: 217301960
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -217,7 +257,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 462
-      job_id: jw56q1j7g
+      job_id: jz576w7rg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -226,13 +266,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:24:52Z'
+    timestamp: '2024-06-22T23:34:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 2973.0
-      throughput: 336.3605785401951
+      inference_time: 3151.0
+      throughput: 317.35956839098696
       estimated_peak_memory_range:
-        min: 942080
-        max: 226696352
+        min: 1753088
+        max: 235076768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -240,14 +280,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: jqp4jwolp
+      job_id: j2p0kqde5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2764.0
-      throughput: 361.794500723589
+      inference_time: 2841.0
+      throughput: 351.98873636043646
       estimated_peak_memory_range:
-        min: 0
-        max: 138707216
+        min: 21213184
+        max: 142867088
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -255,14 +295,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: jqpyvdw4p
+      job_id: j1pv4oemp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 4502.0
-      throughput: 222.1235006663705
+    torchscript_onnx:
+      inference_time: 4262.0
+      throughput: 234.6316283435007
       estimated_peak_memory_range:
-        min: 27127808
-        max: 85392304
+        min: 27529216
+        max: 76179792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -270,7 +310,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 462
-      job_id: jwgoev0dp
+      job_id: j0pxmjd9g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -279,13 +319,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:24:54Z'
+    timestamp: '2024-06-22T23:34:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 3909.0
-      throughput: 255.81990278843693
+      inference_time: 3881.0
+      throughput: 257.6655501159495
       estimated_peak_memory_range:
-        min: 2981888
-        max: 5533208
+        min: 2973696
+        max: 5500200
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -293,14 +333,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 557
-      job_id: jo5mvz2q5
+      job_id: jogkdnoop
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3717.0
-      throughput: 269.03416733925206
+      inference_time: 3653.0
+      throughput: 273.7476047084588
       estimated_peak_memory_range:
-        min: 21213184
-        max: 37347800
+        min: 21233664
+        max: 48012888
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -308,7 +348,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: jn5q92kmp
+      job_id: jz5wxv8jp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -317,10 +357,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:24:50Z'
+    timestamp: '2024-06-22T23:34:45Z'
+  - torchscript_onnx_tflite:
+      inference_time: 3865.0
+      throughput: 258.73221216041395
+      estimated_peak_memory_range:
+        min: 2994176
+        max: 5685640
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 557
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 557
+      job_id: j1gl7zol5
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 3721.0
+      throughput: 268.74496103198067
+      estimated_peak_memory_range:
+        min: 21221376
+        max: 35267480
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 447
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 447
+      job_id: jnp13l7l5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:34:47Z'
   - torchscript_onnx_qnn:
-      inference_time: 3772.0
-      throughput: 265.11134676564154
+      inference_time: 3173.0
+      throughput: 315.1591553734636
       estimated_peak_memory_range:
         min: 21229568
         max: 21229568
@@ -331,14 +409,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 447
-      job_id: j1p8w798p
+      job_id: jlpe2180p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 4450.0
-      throughput: 224.7191011235955
+    torchscript_onnx:
+      inference_time: 4549.0
+      throughput: 219.82853374367994
       estimated_peak_memory_range:
-        min: 19857408
-        max: 19857408
+        min: 21237760
+        max: 21237760
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -346,7 +424,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 462
-      job_id: j7gjklm85
+      job_id: jegnxy7m5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -355,4 +433,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:24:56Z'
+    timestamp: '2024-06-22T23:34:53Z'
diff --git a/qai_hub_models/models/whisper_tiny_en/requirements.txt b/qai_hub_models/models/whisper_tiny_en/requirements.txt
index fa34d4f8..1b6cbc24 100644
--- a/qai_hub_models/models/whisper_tiny_en/requirements.txt
+++ b/qai_hub_models/models/whisper_tiny_en/requirements.txt
@@ -1,2 +1,4 @@
 openai-whisper==20230314
 scipy==1.8.1
+audio2numpy==0.1.2
+samplerate==0.2.1
diff --git a/qai_hub_models/models/whisper_tiny_en/test.py b/qai_hub_models/models/whisper_tiny_en/test.py
index aeb74e53..d94d4bf5 100644
--- a/qai_hub_models/models/whisper_tiny_en/test.py
+++ b/qai_hub_models/models/whisper_tiny_en/test.py
@@ -19,4 +19,4 @@ def test_transcribe():
 
 
 def test_demo():
-    demo_main()
+    demo_main(is_test=True)
diff --git a/qai_hub_models/models/wideresnet50/export.py b/qai_hub_models/models/wideresnet50/export.py
index a5bd28dc..b6260264 100644
--- a/qai_hub_models/models/wideresnet50/export.py
+++ b/qai_hub_models/models/wideresnet50/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml
index 8a782d43..60b76fbc 100644
--- a/qai_hub_models/models/wideresnet50/perf.yaml
+++ b/qai_hub_models/models/wideresnet50/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: WideResNet50
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 4868.0
-      throughput: 205.42317173377157
+      inference_time: 4893.0
+      throughput: 204.37359493153485
       estimated_peak_memory_range:
-        min: 24576
-        max: 2240024
+        min: 16384
+        max: 2332784
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jz5wm9vjg
+      job_id: jqpynw74g
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5652.0
-      throughput: 176.92852087756546
+      inference_time: 5681.0
+      throughput: 176.0253476500616
       estimated_peak_memory_range:
-        min: 622592
-        max: 250014320
+        min: 618496
+        max: 355223896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jvgd7v9lg
+      job_id: jn5qwkmm5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5471.0
-      throughput: 182.78194114421495
+    torchscript_onnx:
+      inference_time: 5421.0
+      throughput: 184.46781036709095
       estimated_peak_memory_range:
-        min: 20480
-        max: 445804176
+        min: 12288
+        max: 475100016
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jvgd7vleg
+      job_id: j1pv4o9mp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:25:28Z'
+    timestamp: '2024-06-22T23:35:29Z'
   - torchscript_onnx_tflite:
-      inference_time: 3644.0
-      throughput: 274.423710208562
+      inference_time: 3626.0
+      throughput: 275.78599007170436
       estimated_peak_memory_range:
-        min: 16384
-        max: 100476704
+        min: 12288
+        max: 101920256
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jmg9941vg
+      job_id: j2p0kqve5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 4212.0
-      throughput: 237.41690408357076
+      inference_time: 4210.0
+      throughput: 237.52969121140143
       estimated_peak_memory_range:
         min: 618496
-        max: 53808800
+        max: 50542208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jz5wm9n6g
+      job_id: j1gl7z1l5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 4064.0
-      throughput: 246.06299212598427
+    torchscript_onnx:
+      inference_time: 4131.0
+      throughput: 242.0721374969741
       estimated_peak_memory_range:
         min: 618496
-        max: 31598192
+        max: 32538336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jz57vd3l5
+      job_id: j7gj1mw8g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:25:29Z'
+    timestamp: '2024-06-22T23:35:30Z'
   - torchscript_onnx_tflite:
-      inference_time: 4872.0
-      throughput: 205.2545155993432
+      inference_time: 4880.0
+      throughput: 204.91803278688525
       estimated_peak_memory_range:
         min: 20480
-        max: 2441976
+        max: 2445640
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 79
-      job_id: jnp1q8llg
+      job_id: j1p88948p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5687.0
-      throughput: 175.83963425356075
+      inference_time: 5688.0
+      throughput: 175.8087201125176
       estimated_peak_memory_range:
-        min: 618496
-        max: 354920904
+        min: 647168
+        max: 334059464
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jnp1q8x2g
+      job_id: j1p383wz5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:25:27Z'
+    timestamp: '2024-06-22T23:35:26Z'
+  - torchscript_onnx_tflite:
+      inference_time: 4855.0
+      throughput: 205.97322348094747
+      estimated_peak_memory_range:
+        min: 139264
+        max: 2157448
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 79
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 79
+      job_id: jogkdn9op
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 5695.0
+      throughput: 175.5926251097454
+      estimated_peak_memory_range:
+        min: 618496
+        max: 355056952
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 126
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 126
+      job_id: jwgom04d5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:35:28Z'
   - torchscript_onnx_qnn:
-      inference_time: 5842.0
-      throughput: 171.17425539198905
+      inference_time: 4659.0
+      throughput: 214.63833440652502
       estimated_peak_memory_range:
         min: 602112
         max: 602112
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 126
-      job_id: jmg994elg
+      job_id: jw56vjd7p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5121.0
-      throughput: 195.27436047646944
+    torchscript_onnx:
+      inference_time: 5080.0
+      throughput: 196.8503937007874
       estimated_peak_memory_range:
-        min: 71557120
-        max: 71557120
+        min: 25653248
+        max: 25653248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 128
-      job_id: jqp4jw0vp
+      job_id: jlpe21l0p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:25:30Z'
+    timestamp: '2024-06-22T23:35:31Z'
diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py
index a10d2988..4308a232 100644
--- a/qai_hub_models/models/wideresnet50_quantized/export.py
+++ b/qai_hub_models/models/wideresnet50_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image_tensor"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last(
                 "image_tensor", sample_inputs, target_runtime
             )
@@ -196,7 +196,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/wideresnet50_quantized/model.py b/qai_hub_models/models/wideresnet50_quantized/model.py
index 2894b748..a0fa95da 100644
--- a/qai_hub_models/models/wideresnet50_quantized/model.py
+++ b/qai_hub_models/models/wideresnet50_quantized/model.py
@@ -83,5 +83,4 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         return cls(sim)
diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml
index cd023541..a53c5d22 100644
--- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml
+++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: WideResNet50-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1803.0
-      throughput: 554.6311702717693
+      inference_time: 1816.0
+      throughput: 550.6607929515418
       estimated_peak_memory_range:
-        min: 12288
-        max: 2605960
+        min: 16384
+        max: 2720040
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jo5mvzyw5
+      job_id: jz5wxv1jp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2049.0
-      throughput: 488.0429477794046
+      inference_time: 2041.0
+      throughput: 489.9559039686428
       estimated_peak_memory_range:
-        min: 16384
-        max: 124262304
+        min: 0
+        max: 145574320
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: j2p0er765
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 2037.0
-      throughput: 490.9180166912126
-      estimated_peak_memory_range:
-        min: 12288
-        max: 210986456
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 83
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 83
-      job_id: j1glekr8p
+      job_id: j0pxmjx9g
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:27:50Z'
+    timestamp: '2024-06-22T23:37:47Z'
   - torchscript_onnx_tflite:
-      inference_time: 1386.0
-      throughput: 721.5007215007215
+      inference_time: 1390.0
+      throughput: 719.4244604316547
       estimated_peak_memory_range:
         min: 12288
-        max: 56539024
+        max: 57045488
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jegnre8r5
+      job_id: jmg981xvp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1532.0
-      throughput: 652.7415143603133
+      inference_time: 1538.0
+      throughput: 650.1950585175553
       estimated_peak_memory_range:
-        min: 172032
-        max: 45717904
+        min: 0
+        max: 42894192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: j1p8w7vxp
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1574.0
-      throughput: 635.3240152477764
-      estimated_peak_memory_range:
-        min: 12288
-        max: 29772112
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 83
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 83
-      job_id: jw56q1l0g
+      job_id: jo5m428q5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:27:51Z'
+    timestamp: '2024-06-22T23:37:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 1824.0
-      throughput: 548.2456140350877
+      inference_time: 1810.0
+      throughput: 552.4861878453039
       estimated_peak_memory_range:
-        min: 24576
-        max: 86925416
+        min: 12288
+        max: 1592592
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jopr1yj9g
+      job_id: jnp13lvl5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 2034.0
-      throughput: 491.6420845624385
+      inference_time: 2022.0
+      throughput: 494.55984174085063
       estimated_peak_memory_range:
-        min: 12288
-        max: 7539488
+        min: 16384
+        max: 115424408
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jn5q92o4p
+      job_id: jopr9qwep
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:27:49Z'
+    timestamp: '2024-06-22T23:37:51Z'
   - torchscript_onnx_tflite:
-      inference_time: 7862.0
-      throughput: 127.1940981938438
+      inference_time: 1818.0
+      throughput: 550.05500550055
       estimated_peak_memory_range:
-        min: 12288
-        max: 27235632
+        min: 28672
+        max: 1588664
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 80
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 80
+      job_id: jvgd09zlp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 2027.0
+      throughput: 493.33991119881597
+      estimated_peak_memory_range:
+        min: 176128
+        max: 6826976
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 78
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 78
+      job_id: jep2j6em5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:37:52Z'
+  - torchscript_onnx_tflite:
+      inference_time: 8221.0
+      throughput: 121.6397031991242
+      estimated_peak_memory_range:
+        min: 20480
+        max: 30028192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +210,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jep23mn4g
+      job_id: jz576w9rg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:27:44Z'
+    timestamp: '2024-06-22T23:37:44Z'
   - torchscript_onnx_tflite:
-      inference_time: 23597.0
-      throughput: 42.3782684239522
+      inference_time: 23723.0
+      throughput: 42.153184673102054
       estimated_peak_memory_range:
-        min: 53248
-        max: 3084328
+        min: 49152
+        max: 2236888
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 80
-      job_id: jqpyvd07p
+      job_id: jqp48o3lg
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,13 +242,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:27:45Z'
+    timestamp: '2024-06-22T23:37:46Z'
   - torchscript_onnx_qnn:
-      inference_time: 1964.0
-      throughput: 509.1649694501018
+      inference_time: 1851.0
+      throughput: 540.2485143165857
       estimated_peak_memory_range:
-        min: 368640
-        max: 368640
+        min: 286720
+        max: 286720
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 78
-      job_id: jogkrym25
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1848.0
-      throughput: 541.1255411255411
-      estimated_peak_memory_range:
-        min: 23400448
-        max: 23400448
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 83
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 83
-      job_id: j1p3qm2l5
+      job_id: jegnxykm5
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:27:52Z'
+    timestamp: '2024-06-22T23:37:49Z'
diff --git a/qai_hub_models/models/xlsr/export.py b/qai_hub_models/models/xlsr/export.py
index dfc3b401..e9c6c913 100644
--- a/qai_hub_models/models/xlsr/export.py
+++ b/qai_hub_models/models/xlsr/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -208,7 +207,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/xlsr/info.yaml b/qai_hub_models/models/xlsr/info.yaml
index b7ff7c32..c363b60a 100644
--- a/qai_hub_models/models/xlsr/info.yaml
+++ b/qai_hub_models/models/xlsr/info.yaml
@@ -16,7 +16,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr
 technical_details:
   Model checkpoint: xlsr_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 22.0K
   Model size: 92.7 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/xlsr/model.py b/qai_hub_models/models/xlsr/model.py
index 4c3e804c..6d6a4df0 100644
--- a/qai_hub_models/models/xlsr/model.py
+++ b/qai_hub_models/models/xlsr/model.py
@@ -39,6 +39,5 @@ def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> XLSR:
             )
             checkpoint = load_torch(checkpoint_asset)
             model.load_state_dict(checkpoint["state_dict"])
-            model.eval()
 
         return cls(model, scale_factor)
diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml
index 9274c714..27429f93 100644
--- a/qai_hub_models/models/xlsr/perf.yaml
+++ b/qai_hub_models/models/xlsr/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: XLSR
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2486.0
-      throughput: 402.2526146419952
+      inference_time: 2535.0
+      throughput: 394.47731755424064
       estimated_peak_memory_range:
-        min: 32768
-        max: 7588944
+        min: 16384
+        max: 1922488
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: j1gle1wmp
+      job_id: jw56vjw7p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1374.0
-      throughput: 727.802037845706
+      inference_time: 1355.0
+      throughput: 738.0073800738007
       estimated_peak_memory_range:
-        min: 24576
-        max: 15889328
+        min: 217088
+        max: 70830008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jwgoe4dkp
+      job_id: j7gj1mq8g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1554.0
-      throughput: 643.5006435006435
+    torchscript_onnx:
+      inference_time: 1527.0
+      throughput: 654.8788474132285
       estimated_peak_memory_range:
         min: 221184
-        max: 17637032
+        max: 3603224
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 23
-      job_id: jygzv4zxp
+      job_id: jnp13l6l5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T11:59:35Z'
+    timestamp: '2024-06-22T23:38:25Z'
   - torchscript_onnx_tflite:
-      inference_time: 1792.0
-      throughput: 558.0357142857143
+      inference_time: 1872.0
+      throughput: 534.1880341880342
       estimated_peak_memory_range:
         min: 16384
-        max: 20986272
+        max: 20912880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: jw56qdoyg
+      job_id: j1p3836z5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 840.0
-      throughput: 1190.4761904761904
+      inference_time: 832.0
+      throughput: 1201.923076923077
       estimated_peak_memory_range:
-        min: 212992
-        max: 20099296
+        min: 0
+        max: 16360336
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: j1pvz92rg
+      job_id: jlpe21y0p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1035.0
-      throughput: 966.1835748792271
+    torchscript_onnx:
+      inference_time: 1002.0
+      throughput: 998.003992015968
       estimated_peak_memory_range:
-        min: 212992
-        max: 14654368
+        min: 12288
+        max: 13808768
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 23
-      job_id: jz5wm1ymg
+      job_id: jvgd092lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T11:59:35Z'
+    timestamp: '2024-06-22T23:38:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 2862.0
-      throughput: 349.4060097833683
+      inference_time: 2663.0
+      throughput: 375.51633496057076
       estimated_peak_memory_range:
-        min: 28672
-        max: 1426392
+        min: 622592
+        max: 2212736
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 16
-      job_id: j1p3qwon5
+      job_id: jwgom08d5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 1370.0
-      throughput: 729.92700729927
+      inference_time: 1351.0
+      throughput: 740.1924500370096
       estimated_peak_memory_range:
-        min: 217088
-        max: 9171344
+        min: 20480
+        max: 8885192
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: jlpe4l6v5
+      job_id: jz5wxv4jp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T11:59:34Z'
+    timestamp: '2024-06-22T23:38:23Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2573.0
+      throughput: 388.65137971239795
+      estimated_peak_memory_range:
+        min: 3796992
+        max: 11747264
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 13
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 16
+      job_id: j1pv4o7mp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 1357.0
+      throughput: 736.9196757553427
+      estimated_peak_memory_range:
+        min: 229376
+        max: 5324736
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 21
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 21
+      job_id: jmg981dvp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:38:24Z'
   - torchscript_onnx_qnn:
-      inference_time: 3631.0
-      throughput: 275.40622418066647
+      inference_time: 1500.0
+      throughput: 666.6666666666666
       estimated_peak_memory_range:
-        min: 221184
-        max: 221184
+        min: 204800
+        max: 204800
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 21
-      job_id: j7gjkw3e5
+      job_id: jygzw9n6g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 1489.0
-      throughput: 671.591672263264
+    torchscript_onnx:
+      inference_time: 1524.0
+      throughput: 656.1679790026246
       estimated_peak_memory_range:
-        min: 8957952
-        max: 8957952
+        min: 8962048
+        max: 8962048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 23
-      job_id: jnp1qvo7g
+      job_id: jz5wxv76p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T11:59:36Z'
+    timestamp: '2024-06-22T23:38:27Z'
diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py
index a0ddab0f..98bfcd16 100644
--- a/qai_hub_models/models/xlsr_quantized/export.py
+++ b/qai_hub_models/models/xlsr_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_0"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -216,7 +216,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_0", inference_result, target_runtime
             )
diff --git a/qai_hub_models/models/xlsr_quantized/info.yaml b/qai_hub_models/models/xlsr_quantized/info.yaml
index cadc40fc..e6c227f0 100644
--- a/qai_hub_models/models/xlsr_quantized/info.yaml
+++ b/qai_hub_models/models/xlsr_quantized/info.yaml
@@ -17,7 +17,7 @@ deploy_license:
 source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr
 technical_details:
   Model checkpoint: xlsr_3x_checkpoint
-  Input resolution: 640x360
+  Input resolution: 128x128
   Number of parameters: 22.0K
   Model size: 39.0 KB
 applicable_scenarios:
diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml
index c06896f0..3c913f54 100644
--- a/qai_hub_models/models/xlsr_quantized/perf.yaml
+++ b/qai_hub_models/models/xlsr_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: XLSR-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 1141.0
-      throughput: 876.4241893076249
+      inference_time: 1129.0
+      throughput: 885.7395925597874
       estimated_peak_memory_range:
-        min: 28672
-        max: 5356448
+        min: 20480
+        max: 1611344
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,14 +58,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jmg99xomg
+      job_id: jnp13lj25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 799.0
-      throughput: 1251.5644555694619
+      inference_time: 807.0
+      throughput: 1239.1573729863692
       estimated_peak_memory_range:
-        min: 16384
-        max: 12173096
+        min: 86016
+        max: 72738664
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -69,22 +73,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: j0pxed085
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 769.0
-      throughput: 1300.3901170351105
-      estimated_peak_memory_range:
-        min: 12288
-        max: 3749080
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 21
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 21
-      job_id: jep23vo6g
+      job_id: jegnxynr5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -93,13 +82,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-11T12:00:03Z'
+    timestamp: '2024-06-22T23:38:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 943.0
-      throughput: 1060.4453870625662
+      inference_time: 1020.0
+      throughput: 980.3921568627451
       estimated_peak_memory_range:
-        min: 16384
-        max: 21882800
+        min: 12288
+        max: 22181408
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -107,14 +96,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jnp1qvong
+      job_id: jvgd093ep
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 546.0
-      throughput: 1831.5018315018315
+      inference_time: 549.0
+      throughput: 1821.4936247723133
       estimated_peak_memory_range:
-        min: 65536
-        max: 19116992
+        min: 12288
+        max: 17452880
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -122,22 +111,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jo5mvd975
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 552.0
-      throughput: 1811.5942028985507
-      estimated_peak_memory_range:
-        min: 61440
-        max: 18287376
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 21
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 21
-      job_id: jqpyv780p
+      job_id: jopr9q09p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -146,13 +120,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-11T12:00:04Z'
+    timestamp: '2024-06-22T23:38:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 1145.0
-      throughput: 873.3624454148471
+      inference_time: 1128.0
+      throughput: 886.5248226950355
       estimated_peak_memory_range:
-        min: 106496
-        max: 1718744
+        min: 20480
+        max: 1490032
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -160,14 +134,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jvgd7z66g
+      job_id: jz576w4lg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 807.0
-      throughput: 1239.1573729863692
+      inference_time: 793.0
+      throughput: 1261.034047919294
       estimated_peak_memory_range:
-        min: 16384
-        max: 17351048
+        min: 81920
+        max: 9688640
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -175,7 +149,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jopr1nxkg
+      job_id: jqpynwx7g
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -184,13 +158,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-11T12:00:02Z'
+    timestamp: '2024-06-22T23:38:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 2637.0
-      throughput: 379.21880925293897
+      inference_time: 1123.0
+      throughput: 890.4719501335708
       estimated_peak_memory_range:
-        min: 12288
-        max: 14920896
+        min: 1662976
+        max: 3283600
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -198,7 +172,45 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 3
         total_layers: 17
-      job_id: jz57v7on5
+      job_id: jqp48o1vg
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 801.0
+      throughput: 1248.4394506866417
+      estimated_peak_memory_range:
+        min: 81920
+        max: 73030968
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 17
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 17
+      job_id: j2p0kqj65
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:38:57Z'
+  - torchscript_onnx_tflite:
+      inference_time: 2434.0
+      throughput: 410.84634346754314
+      estimated_peak_memory_range:
+        min: 32768
+        max: 16271888
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 14
+        layers_on_gpu: 0
+        layers_on_cpu: 3
+        total_layers: 17
+      job_id: j0pxmj41g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -207,13 +219,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-11T11:59:56Z'
+    timestamp: '2024-06-22T23:38:49Z'
   - torchscript_onnx_tflite:
       inference_time: 11523.0
       throughput: 86.78295582747549
       estimated_peak_memory_range:
-        min: 2777088
-        max: 8508512
+        min: 2985984
+        max: 10173008
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -221,7 +233,7 @@ models:
         layers_on_gpu: 9
         layers_on_cpu: 5
         total_layers: 17
-      job_id: jqp4j9e2p
+      job_id: jo5m42mw5
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -230,10 +242,10 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-11T11:59:56Z'
+    timestamp: '2024-06-22T23:38:50Z'
   - torchscript_onnx_qnn:
-      inference_time: 960.0
-      throughput: 1041.6666666666667
+      inference_time: 862.0
+      throughput: 1160.092807424594
       estimated_peak_memory_range:
         min: 57344
         max: 57344
@@ -244,22 +256,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 17
-      job_id: jegnr71j5
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 750.0
-      throughput: 1333.3333333333333
-      estimated_peak_memory_range:
-        min: 7811072
-        max: 7811072
-      primary_compute_unit: NPU
-      precision: int8
-      layer_info:
-        layers_on_npu: 21
-        layers_on_gpu: 0
-        layers_on_cpu: 0
-        total_layers: 21
-      job_id: j2p0ev905
+      job_id: jep2j6w45
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -268,4 +265,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-11T12:00:04Z'
+    timestamp: '2024-06-22T23:38:54Z'
diff --git a/qai_hub_models/models/yolonas/export.py b/qai_hub_models/models/yolonas/export.py
index 3edacba9..4dd25551 100644
--- a/qai_hub_models/models/yolonas/export.py
+++ b/qai_hub_models/models/yolonas/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/yolonas/model.py b/qai_hub_models/models/yolonas/model.py
index b2f5e62f..72e6bad8 100644
--- a/qai_hub_models/models/yolonas/model.py
+++ b/qai_hub_models/models/yolonas/model.py
@@ -105,7 +105,7 @@ def from_pretrained(
             input_size = cls.get_input_spec()["image"][0]
             model.prep_model_for_conversion(input_size=input_size)
             model.heads.eval_size = input_size[2:]
-            return cls(model.eval(), include_postprocessing)
+            return cls(model, include_postprocessing)
 
     def forward(self, image):
         """
diff --git a/qai_hub_models/models/yolonas/perf.yaml b/qai_hub_models/models/yolonas/perf.yaml
index 4798067d..8e4e7038 100644
--- a/qai_hub_models/models/yolonas/perf.yaml
+++ b/qai_hub_models/models/yolonas/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Yolo-NAS
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 12935.0
-      throughput: 77.30962504831851
+      inference_time: 10996.0
+      throughput: 90.94216078574027
       estimated_peak_memory_range:
-        min: 245760
-        max: 7789312
+        min: 258048
+        max: 4633448
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 201
-      job_id: jmg993llg
+      job_id: jwgom0rx5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 14574.0
-      throughput: 68.61534239055852
+      inference_time: 14669.0
+      throughput: 68.17097279978185
       estimated_peak_memory_range:
-        min: 5861376
-        max: 20985784
+        min: 4960256
+        max: 23830208
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jz57vjyl5
+      job_id: jygzw9mkg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 9949.0
-      throughput: 100.51261433309881
+    torchscript_onnx:
+      inference_time: 9961.0
+      throughput: 100.39152695512499
       estimated_peak_memory_range:
-        min: 540672
-        max: 61160336
+        min: 12288
+        max: 59883912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jegnr96r5
+      job_id: jz576welg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:29:29Z'
+    timestamp: '2024-06-22T23:39:48Z'
   - torchscript_onnx_tflite:
-      inference_time: 9036.0
-      throughput: 110.66843736166446
+      inference_time: 7459.0
+      throughput: 134.0662287169862
       estimated_peak_memory_range:
         min: 217088
-        max: 99001056
+        max: 101612880
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 201
-      job_id: jnp1qd42g
+      job_id: j1pv4odjp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 10109.0
-      throughput: 98.92175289346127
+      inference_time: 10114.0
+      throughput: 98.87284951552304
       estimated_peak_memory_range:
         min: 4931584
-        max: 92525504
+        max: 86609072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jqp4jxlvp
+      job_id: jz5wxvl6p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6486.0
-      throughput: 154.17823003391922
+    torchscript_onnx:
+      inference_time: 6437.0
+      throughput: 155.35187199005748
       estimated_peak_memory_range:
         min: 4931584
-        max: 56975920
+        max: 49511008
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jopr14v9g
+      job_id: jqp48oyvg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:29:30Z'
+    timestamp: '2024-06-22T23:39:49Z'
   - torchscript_onnx_tflite:
-      inference_time: 12949.0
-      throughput: 77.22604062089736
+      inference_time: 10774.0
+      throughput: 92.81603861147207
       estimated_peak_memory_range:
-        min: 225280
-        max: 7472208
+        min: 241664
+        max: 6806360
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 201
-      job_id: jvgd7rxeg
+      job_id: j7gj1m7xg
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 15243.0
-      throughput: 65.603883749918
+      inference_time: 14774.0
+      throughput: 67.68647624204684
       estimated_peak_memory_range:
-        min: 4952064
-        max: 23136736
+        min: 4947968
+        max: 22526536
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: jo5mvwnw5
+      job_id: jnp13ln25
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,13 +180,51 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:29:28Z'
+    timestamp: '2024-06-22T23:39:45Z'
+  - torchscript_onnx_tflite:
+      inference_time: 10799.0
+      throughput: 92.60116677470135
+      estimated_peak_memory_range:
+        min: 12288
+        max: 6396832
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 201
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 201
+      job_id: jlpe21z1p
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 14766.0
+      throughput: 67.72314777190844
+      estimated_peak_memory_range:
+        min: 4960256
+        max: 23017136
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 289
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 289
+      job_id: jvgd09dep
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:39:47Z'
   - torchscript_onnx_qnn:
-      inference_time: 11897.0
-      throughput: 84.05480373203329
+      inference_time: 10605.0
+      throughput: 94.2951438000943
       estimated_peak_memory_range:
-        min: 4808704
-        max: 4808704
+        min: 4923392
+        max: 4923392
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 289
-      job_id: j0pxe7k15
+      job_id: jmg981zlp
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 10119.0
-      throughput: 98.82399446585632
+    torchscript_onnx:
+      inference_time: 10061.0
+      throughput: 99.39369843951893
       estimated_peak_memory_range:
-        min: 5672960
-        max: 5672960
+        min: 4124672
+        max: 4124672
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jep237k4g
+      job_id: j0pxmjl1g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:29:31Z'
+    timestamp: '2024-06-22T23:39:50Z'
diff --git a/qai_hub_models/models/yolonas_quantized/export.py b/qai_hub_models/models/yolonas_quantized/export.py
index 86a7b17f..e8e9ed7e 100644
--- a/qai_hub_models/models/yolonas_quantized/export.py
+++ b/qai_hub_models/models/yolonas_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -228,8 +228,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/yolonas_quantized/model.py b/qai_hub_models/models/yolonas_quantized/model.py
index 52c5fb5e..d524500d 100644
--- a/qai_hub_models/models/yolonas_quantized/model.py
+++ b/qai_hub_models/models/yolonas_quantized/model.py
@@ -79,7 +79,6 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         final_model = cls(sim)
         return final_model
 
diff --git a/qai_hub_models/models/yolonas_quantized/perf.yaml b/qai_hub_models/models/yolonas_quantized/perf.yaml
index 00f23b93..57e382b2 100644
--- a/qai_hub_models/models/yolonas_quantized/perf.yaml
+++ b/qai_hub_models/models/yolonas_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,19 +46,19 @@ models:
 - name: Yolo-NAS-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 6973.0
-      throughput: 143.41029685931449
+      inference_time: 4970.0
+      throughput: 201.2072434607646
       estimated_peak_memory_range:
-        min: 10432512
-        max: 13902448
+        min: 110592
+        max: 9842984
       primary_compute_unit: NPU
       precision: int8
       layer_info:
         layers_on_npu: 200
         layers_on_gpu: 0
-        layers_on_cpu: 3
-        total_layers: 203
-      job_id: j2p0e1z65
+        layers_on_cpu: 1
+        total_layers: 201
+      job_id: jegnxyzr5
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,21 +67,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:30:46Z'
+    timestamp: '2024-06-22T23:41:04Z'
   - torchscript_onnx_tflite:
-      inference_time: 5003.0
-      throughput: 199.8800719568259
+      inference_time: 3288.0
+      throughput: 304.1362530413625
       estimated_peak_memory_range:
-        min: 356352
-        max: 64309792
+        min: 36864
+        max: 75911536
       primary_compute_unit: NPU
       precision: int8
       layer_info:
         layers_on_npu: 200
         layers_on_gpu: 0
-        layers_on_cpu: 3
-        total_layers: 203
-      job_id: j1p8w3qxp
+        layers_on_cpu: 1
+        total_layers: 201
+      job_id: jopr9ql9p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,21 +90,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:30:47Z'
+    timestamp: '2024-06-22T23:41:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 6937.0
-      throughput: 144.15453366008362
+      inference_time: 4997.0
+      throughput: 200.12007204322595
       estimated_peak_memory_range:
-        min: 10485760
-        max: 42442768
+        min: 126976
+        max: 192876504
       primary_compute_unit: NPU
       precision: int8
       layer_info:
         layers_on_npu: 200
         layers_on_gpu: 0
-        layers_on_cpu: 3
-        total_layers: 203
-      job_id: jogkrle25
+        layers_on_cpu: 1
+        total_layers: 201
+      job_id: jep2j6r45
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,50 +113,50 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:30:48Z'
+    timestamp: '2024-06-22T23:41:06Z'
   - torchscript_onnx_tflite:
-      inference_time: 23899.0
-      throughput: 41.84275492698439
+      inference_time: 4999.0
+      throughput: 200.0400080016003
       estimated_peak_memory_range:
-        min: 765952
-        max: 56795680
+        min: 110592
+        max: 10179344
       primary_compute_unit: NPU
       precision: int8
       layer_info:
         layers_on_npu: 200
         layers_on_gpu: 0
-        layers_on_cpu: 3
-        total_layers: 203
-      job_id: jn5q9764p
+        layers_on_cpu: 1
+        total_layers: 201
+      job_id: jqpynwo7g
       job_status: Passed
     reference_device_info:
-      name: RB3 Gen 2 (Proxy)
-      os: '12'
-      form_factor: Iot
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs6490
-    timestamp: '2024-06-08T23:30:49Z'
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:41:08Z'
   - torchscript_onnx_tflite:
-      inference_time: 131373.0
-      throughput: 7.611914168055841
+      inference_time: 13017.0
+      throughput: 76.82261657832066
       estimated_peak_memory_range:
-        min: 15310848
-        max: 24594432
-      primary_compute_unit: CPU
-      precision: fp32
+        min: 12288
+        max: 58356864
+      primary_compute_unit: NPU
+      precision: int8
       layer_info:
-        layers_on_npu: 0
+        layers_on_npu: 200
         layers_on_gpu: 0
-        layers_on_cpu: 203
-        total_layers: 203
-      job_id: j1gle0v8p
+        layers_on_cpu: 1
+        total_layers: 201
+      job_id: j2p0kqm65
       job_status: Passed
     reference_device_info:
-      name: RB5 (Proxy)
+      name: RB3 Gen 2 (Proxy)
       os: '12'
       form_factor: Iot
       os_name: Android
       manufacturer: Qualcomm
-      chipset: Qcs8250
-    timestamp: '2024-06-08T23:30:50Z'
+      chipset: Qcs6490
+    timestamp: '2024-06-22T23:41:09Z'
diff --git a/qai_hub_models/models/yolov6/export.py b/qai_hub_models/models/yolov6/export.py
index 1b9b17c2..d9a560f7 100644
--- a/qai_hub_models/models/yolov6/export.py
+++ b/qai_hub_models/models/yolov6/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml
index 896ae16b..40423c23 100644
--- a/qai_hub_models/models/yolov6/perf.yaml
+++ b/qai_hub_models/models/yolov6/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: Yolo-v6
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7424.0
-      throughput: 134.69827586206895
+      inference_time: 6188.0
+      throughput: 161.60310277957336
       estimated_peak_memory_range:
-        min: 12288
-        max: 3603960
+        min: 53248
+        max: 9162272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: jz5wmdw6g
+      job_id: jz5wxvy6p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5369.0
-      throughput: 186.25442354255912
+      inference_time: 5354.0
+      throughput: 186.77624206200971
       estimated_peak_memory_range:
-        min: 4968448
-        max: 16471240
+        min: 6352896
+        max: 17627936
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jvgd7rneg
+      job_id: jz576wolg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7761.0
-      throughput: 128.84937508053085
+    torchscript_onnx:
+      inference_time: 6832.0
+      throughput: 146.37002341920376
       estimated_peak_memory_range:
         min: 5341184
-        max: 35743744
+        max: 34589032
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jo5mvwew5
+      job_id: jopr9qx9p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:31:24Z'
+    timestamp: '2024-06-22T23:41:54Z'
   - torchscript_onnx_tflite:
-      inference_time: 5294.0
-      throughput: 188.89308651303364
+      inference_time: 4481.0
+      throughput: 223.1644722160232
       estimated_peak_memory_range:
-        min: 40960
-        max: 79662544
+        min: 20480
+        max: 81810416
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: jmg9930lg
+      job_id: jmg981olp
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3862.0
-      throughput: 258.9331952356292
+      inference_time: 3869.0
+      throughput: 258.46471956577926
       estimated_peak_memory_range:
         min: 4931584
-        max: 95031952
+        max: 85673344
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jz57vj2l5
+      job_id: jqp48oevg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5600.0
-      throughput: 178.57142857142858
+    torchscript_onnx:
+      inference_time: 5175.0
+      throughput: 193.23671497584542
       estimated_peak_memory_range:
-        min: 835584
-        max: 60500960
+        min: 4915200
+        max: 61967408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jegnr90r5
+      job_id: jep2j6o45
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:31:25Z'
+    timestamp: '2024-06-22T23:41:55Z'
   - torchscript_onnx_tflite:
-      inference_time: 7339.0
-      throughput: 136.2583458236817
+      inference_time: 6227.0
+      throughput: 160.59097478721696
       estimated_peak_memory_range:
-        min: 45056
-        max: 9009312
+        min: 237568
+        max: 15085408
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 182
-      job_id: jnp1qd22g
+      job_id: jnp13lo25
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5384.0
-      throughput: 185.73551263001485
+      inference_time: 5368.0
+      throughput: 186.28912071535024
       estimated_peak_memory_range:
-        min: 4939776
-        max: 16906872
+        min: 4935680
+        max: 20817472
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: j0pxe7915
+      job_id: jo5m429w5
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:31:23Z'
+    timestamp: '2024-06-22T23:41:51Z'
+  - torchscript_onnx_tflite:
+      inference_time: 6192.0
+      throughput: 161.49870801033592
+      estimated_peak_memory_range:
+        min: 20480
+        max: 9125240
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 182
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 182
+      job_id: jvgd096ep
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 5417.0
+      throughput: 184.60402436773123
+      estimated_peak_memory_range:
+        min: 4956160
+        max: 16950328
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 228
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 228
+      job_id: jegnxy1r5
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:41:53Z'
   - torchscript_onnx_qnn:
-      inference_time: 6812.0
-      throughput: 146.7997651203758
+      inference_time: 5423.0
+      throughput: 184.39977872026554
       estimated_peak_memory_range:
         min: 4923392
         max: 4923392
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jqp4jxnvp
+      job_id: j0pxmj01g
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6530.0
-      throughput: 153.1393568147014
+    torchscript_onnx:
+      inference_time: 6563.0
+      throughput: 152.36934328813044
       estimated_peak_memory_range:
-        min: 3538944
-        max: 3538944
+        min: 5021696
+        max: 5021696
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 228
-      job_id: jopr1469g
+      job_id: jqpynw87g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:31:26Z'
+    timestamp: '2024-06-22T23:41:56Z'
diff --git a/qai_hub_models/models/yolov7/export.py b/qai_hub_models/models/yolov7/export.py
index ecc0f421..5b315891 100644
--- a/qai_hub_models/models/yolov7/export.py
+++ b/qai_hub_models/models/yolov7/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,13 +116,12 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -168,7 +167,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -186,7 +185,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -218,7 +217,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml
index 4a8b5cd0..757be007 100644
--- a/qai_hub_models/models/yolov7/perf.yaml
+++ b/qai_hub_models/models/yolov7/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,34 +38,19 @@ models:
 - name: Yolo-v7
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 15912.0
-      throughput: 62.845651080945196
+      inference_time: 24960.0
+      throughput: 40.06410256410256
       estimated_peak_memory_range:
-        min: 36864
-        max: 24453640
-      primary_compute_unit: NPU
+        min: 38207488
+        max: 71608088
+      primary_compute_unit: GPU
       precision: fp16
       layer_info:
-        layers_on_npu: 203
-        layers_on_gpu: 0
-        layers_on_cpu: 12
+        layers_on_npu: 0
+        layers_on_gpu: 145
+        layers_on_cpu: 70
         total_layers: 215
-      job_id: jqpyv4z7p
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 13978.0
-      throughput: 71.5409929889827
-      estimated_peak_memory_range:
-        min: 1499136
-        max: 35988136
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 213
-        layers_on_gpu: 0
-        layers_on_cpu: 12
-        total_layers: 225
-      job_id: jw56q320g
+      job_id: j1p889jxp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,36 +59,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:31:53Z'
+    timestamp: '2024-06-22T23:42:17Z'
   - torchscript_onnx_tflite:
-      inference_time: 10805.0
-      throughput: 92.5497454881999
+      inference_time: 18256.0
+      throughput: 54.77651183172656
       estimated_peak_memory_range:
-        min: 1200128
-        max: 65074384
-      primary_compute_unit: NPU
+        min: 77824
+        max: 61690368
+      primary_compute_unit: GPU
       precision: fp16
       layer_info:
-        layers_on_npu: 203
-        layers_on_gpu: 0
-        layers_on_cpu: 12
+        layers_on_npu: 0
+        layers_on_gpu: 145
+        layers_on_cpu: 70
         total_layers: 215
-      job_id: j2p0e1465
-      job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 8800.0
-      throughput: 113.63636363636364
-      estimated_peak_memory_range:
-        min: 7557120
-        max: 68407936
-      primary_compute_unit: NPU
-      precision: fp16
-      layer_info:
-        layers_on_npu: 213
-        layers_on_gpu: 0
-        layers_on_cpu: 12
-        total_layers: 225
-      job_id: j1p3q4nl5
+      job_id: jogkdn62p
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +82,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:31:54Z'
+    timestamp: '2024-06-22T23:42:18Z'
   - torchscript_onnx_tflite:
-      inference_time: 15993.0
-      throughput: 62.52735571812668
+      inference_time: 24597.0
+      throughput: 40.65536447534252
       estimated_peak_memory_range:
-        min: 1232896
-        max: 3455120
-      primary_compute_unit: NPU
+        min: 40337408
+        max: 85297208
+      primary_compute_unit: GPU
       precision: fp16
       layer_info:
-        layers_on_npu: 203
-        layers_on_gpu: 0
-        layers_on_cpu: 12
+        layers_on_npu: 0
+        layers_on_gpu: 145
+        layers_on_cpu: 70
         total_layers: 215
-      job_id: j1p8w32xp
+      job_id: jn5qwk445
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,27 +105,27 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:31:48Z'
-  - torchscript_onnx_ort:
-      inference_time: 13386.0
-      throughput: 74.70491558344538
+    timestamp: '2024-06-22T23:42:19Z'
+  - torchscript_onnx_tflite:
+      inference_time: 24592.0
+      throughput: 40.66363044892648
       estimated_peak_memory_range:
-        min: 4964352
-        max: 4964352
-      primary_compute_unit: NPU
+        min: 40124416
+        max: 91709704
+      primary_compute_unit: GPU
       precision: fp16
       layer_info:
-        layers_on_npu: 213
-        layers_on_gpu: 0
-        layers_on_cpu: 12
-        total_layers: 225
-      job_id: jwgoe1zxp
+        layers_on_npu: 0
+        layers_on_gpu: 145
+        layers_on_cpu: 70
+        total_layers: 215
+      job_id: j1gl7zw85
       job_status: Passed
     reference_device_info:
-      name: Snapdragon X Elite CRD
-      os: '11'
-      form_factor: Compute
-      os_name: Windows
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
       manufacturer: Qualcomm
-      chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:31:55Z'
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:42:20Z'
diff --git a/qai_hub_models/models/yolov7_quantized/export.py b/qai_hub_models/models/yolov7_quantized/export.py
index a8d2b1bc..c0db3d61 100644
--- a/qai_hub_models/models/yolov7_quantized/export.py
+++ b/qai_hub_models/models/yolov7_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -228,8 +228,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/yolov7_quantized/model.py b/qai_hub_models/models/yolov7_quantized/model.py
index adbbb0ae..d544a4f5 100644
--- a/qai_hub_models/models/yolov7_quantized/model.py
+++ b/qai_hub_models/models/yolov7_quantized/model.py
@@ -80,7 +80,6 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         final_model = cls(sim)
         return final_model
 
diff --git a/qai_hub_models/models/yolov7_quantized/perf.yaml b/qai_hub_models/models/yolov7_quantized/perf.yaml
index 765fdc6d..b16507b7 100644
--- a/qai_hub_models/models/yolov7_quantized/perf.yaml
+++ b/qai_hub_models/models/yolov7_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: Yolo-v7-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 4596.0
-      throughput: 217.58050478677112
+      inference_time: 4575.0
+      throughput: 218.5792349726776
       estimated_peak_memory_range:
         min: 311296
-        max: 2244624
+        max: 2495600
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +58,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: j7gjk0dx5
+      job_id: jmg9812lp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +67,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:32:25Z'
+    timestamp: '2024-06-22T23:43:00Z'
   - torchscript_onnx_tflite:
-      inference_time: 2999.0
-      throughput: 333.4444814938313
+      inference_time: 2954.0
+      throughput: 338.52403520649966
       estimated_peak_memory_range:
-        min: 32768
-        max: 61022912
+        min: 40960
+        max: 63977152
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +81,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: jlpe4ro15
+      job_id: jnp13l125
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +90,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:32:26Z'
+    timestamp: '2024-06-22T23:43:02Z'
   - torchscript_onnx_tflite:
-      inference_time: 4588.0
-      throughput: 217.9598953792502
+      inference_time: 4610.0
+      throughput: 216.91973969631238
       estimated_peak_memory_range:
-        min: 299008
-        max: 3108488
+        min: 286720
+        max: 2138736
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +104,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: jygzvx2kp
+      job_id: jvgd094ep
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,13 +113,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:32:27Z'
+    timestamp: '2024-06-22T23:43:03Z'
   - torchscript_onnx_tflite:
-      inference_time: 10699.0
-      throughput: 93.46667912889055
+      inference_time: 4567.0
+      throughput: 218.96211955331728
       estimated_peak_memory_range:
-        min: 266240
-        max: 56452384
+        min: 323584
+        max: 3825968
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -123,7 +127,30 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 226
-      job_id: jz5wmd26g
+      job_id: jz576wnlg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:43:04Z'
+  - torchscript_onnx_tflite:
+      inference_time: 10793.0
+      throughput: 92.6526452330214
+      estimated_peak_memory_range:
+        min: 53248
+        max: 56912800
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 225
+        layers_on_gpu: 0
+        layers_on_cpu: 1
+        total_layers: 226
+      job_id: jqp48o4vg
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -132,13 +159,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:32:29Z'
+    timestamp: '2024-06-22T23:43:05Z'
   - torchscript_onnx_tflite:
-      inference_time: 93320.0
-      throughput: 10.715816545220745
+      inference_time: 99875.0
+      throughput: 10.012515644555695
       estimated_peak_memory_range:
-        min: 8769536
-        max: 46392104
+        min: 1826816
+        max: 37759640
       primary_compute_unit: GPU
       precision: int8
       layer_info:
@@ -146,7 +173,7 @@ models:
         layers_on_gpu: 126
         layers_on_cpu: 68
         total_layers: 226
-      job_id: jmg993jlg
+      job_id: j0pxmjr1g
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -155,4 +182,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:32:29Z'
+    timestamp: '2024-06-22T23:43:06Z'
diff --git a/qai_hub_models/models/yolov8_det/export.py b/qai_hub_models/models/yolov8_det/export.py
index b8418123..953fc5b7 100644
--- a/qai_hub_models/models/yolov8_det/export.py
+++ b/qai_hub_models/models/yolov8_det/export.py
@@ -37,7 +37,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -116,7 +116,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(
         model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
@@ -124,7 +123,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -170,7 +169,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -188,7 +187,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
diff --git a/qai_hub_models/models/yolov8_det/model.py b/qai_hub_models/models/yolov8_det/model.py
index 224497fd..b3e07074 100644
--- a/qai_hub_models/models/yolov8_det/model.py
+++ b/qai_hub_models/models/yolov8_det/model.py
@@ -99,7 +99,6 @@ def from_pretrained(
             from ultralytics import YOLO as ultralytics_YOLO
 
             model = ultralytics_YOLO(ckpt_name).model
-            model.eval()
             return cls(
                 model,
                 include_postprocessing,
diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml
index 79f7e29d..fb80858a 100644
--- a/qai_hub_models/models/yolov8_det/perf.yaml
+++ b/qai_hub_models/models/yolov8_det/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,11 +38,11 @@ models:
 - name: YOLOv8-Detection
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 5900.0
-      throughput: 169.4915254237288
+      inference_time: 5199.0
+      throughput: 192.34468166955185
       estimated_peak_memory_range:
-        min: 40960
-        max: 11760568
+        min: 266240
+        max: 2775568
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -48,14 +50,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: j0pxe7n35
+      job_id: jw56vjm0p
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5248.0
-      throughput: 190.5487804878049
+      inference_time: 5211.0
+      throughput: 191.90174630589138
       estimated_peak_memory_range:
-        min: 4919296
-        max: 17813040
+        min: 4206592
+        max: 16408272
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,14 +65,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: jep2370rg
+      job_id: j7gj1myxg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6498.0
-      throughput: 153.8935056940597
+    torchscript_onnx:
+      inference_time: 6680.0
+      throughput: 149.7005988023952
       estimated_peak_memory_range:
-        min: 8409088
-        max: 39812256
+        min: 5349376
+        max: 36039048
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -78,7 +80,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 286
-      job_id: jogkrl7w5
+      job_id: jnp13lm25
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -87,13 +89,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:33:07Z'
+    timestamp: '2024-06-22T23:43:52Z'
   - torchscript_onnx_tflite:
-      inference_time: 4177.0
-      throughput: 239.40627244433804
+      inference_time: 3748.0
+      throughput: 266.8089647812167
       estimated_peak_memory_range:
-        min: 16384
-        max: 87350704
+        min: 12288
+        max: 81814864
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,14 +103,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jo5mvwqd5
+      job_id: j1p3837l5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 3699.0
-      throughput: 270.3433360367667
+      inference_time: 3693.0
+      throughput: 270.7825616030328
       estimated_peak_memory_range:
         min: 4931584
-        max: 104903584
+        max: 97113280
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -116,14 +118,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: jqpyv4r8p
+      job_id: jlpe21x1p
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 4564.0
-      throughput: 219.10604732690624
+    torchscript_onnx:
+      inference_time: 4294.0
+      throughput: 232.88309268747088
       estimated_peak_memory_range:
-        min: 7028736
-        max: 68265872
+        min: 4952064
+        max: 62885072
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -131,7 +133,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 286
-      job_id: jn5q97enp
+      job_id: jvgd09mep
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -140,13 +142,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:33:08Z'
+    timestamp: '2024-06-22T23:43:53Z'
   - torchscript_onnx_tflite:
-      inference_time: 5907.0
-      throughput: 169.29067208396816
+      inference_time: 5208.0
+      throughput: 192.01228878648234
       estimated_peak_memory_range:
-        min: 245760
-        max: 2242704
+        min: 262144
+        max: 2646912
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -154,14 +156,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 290
-      job_id: jopr1480g
+      job_id: jwgom0wx5
       job_status: Passed
     torchscript_onnx_qnn:
-      inference_time: 5193.0
-      throughput: 192.56691700365877
+      inference_time: 5205.0
+      throughput: 192.12295869356387
       estimated_peak_memory_range:
-        min: 4947968
-        max: 19559888
+        min: 4939776
+        max: 17387896
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -169,7 +171,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: j1p8w30kp
+      job_id: jz5wxvr6p
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -178,10 +180,48 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:33:06Z'
+    timestamp: '2024-06-22T23:43:50Z'
+  - torchscript_onnx_tflite:
+      inference_time: 5200.0
+      throughput: 192.30769230769232
+      estimated_peak_memory_range:
+        min: 32768
+        max: 5389376
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 290
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 290
+      job_id: j1pv4omjp
+      job_status: Passed
+    torchscript_onnx_qnn:
+      inference_time: 5237.0
+      throughput: 190.94901661256444
+      estimated_peak_memory_range:
+        min: 4943872
+        max: 17695848
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 285
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 285
+      job_id: jmg981qlp
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:43:51Z'
   - torchscript_onnx_qnn:
-      inference_time: 5771.0
-      throughput: 173.28019407381737
+      inference_time: 5381.0
+      throughput: 185.8390633711206
       estimated_peak_memory_range:
         min: 4923392
         max: 4923392
@@ -192,14 +232,14 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 285
-      job_id: j2p0e1395
+      job_id: jygzw9ykg
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 6381.0
-      throughput: 156.7152483936687
+    torchscript_onnx:
+      inference_time: 6408.0
+      throughput: 156.05493133583022
       estimated_peak_memory_range:
-        min: 10723328
-        max: 10723328
+        min: 6709248
+        max: 6709248
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -207,7 +247,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 286
-      job_id: j1gle06jp
+      job_id: jz5wxvr3p
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -216,4 +256,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:33:09Z'
+    timestamp: '2024-06-22T23:43:54Z'
diff --git a/qai_hub_models/models/yolov8_det_quantized/export.py b/qai_hub_models/models/yolov8_det_quantized/export.py
index 1c3d53f0..6f6b4177 100644
--- a/qai_hub_models/models/yolov8_det_quantized/export.py
+++ b/qai_hub_models/models/yolov8_det_quantized/export.py
@@ -36,7 +36,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -126,7 +126,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -176,7 +176,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -194,7 +194,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -228,8 +228,8 @@ def main():
     parser = export_parser(
         model_cls=Model,
         supports_qnn=False,
-        supports_ort=False,
-        supports_precompiled_ort=False,
+        supports_onnx=False,
+        supports_precompiled_qnn_onnx=False,
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/yolov8_det_quantized/model.py b/qai_hub_models/models/yolov8_det_quantized/model.py
index 2943cb6a..bc81aab8 100644
--- a/qai_hub_models/models/yolov8_det_quantized/model.py
+++ b/qai_hub_models/models/yolov8_det_quantized/model.py
@@ -84,7 +84,6 @@ def from_pretrained(
                 ).fetch()
             load_encodings_to_sim(sim, aimet_encodings)
 
-        sim.model.eval()
         final_model = cls(sim, False)
         return final_model
 
diff --git a/qai_hub_models/models/yolov8_det_quantized/perf.yaml b/qai_hub_models/models/yolov8_det_quantized/perf.yaml
index 9271d6d2..8abffb0c 100644
--- a/qai_hub_models/models/yolov8_det_quantized/perf.yaml
+++ b/qai_hub_models/models/yolov8_det_quantized/perf.yaml
@@ -13,6 +13,8 @@ aggregated:
   - QCS8550 (Proxy)
   - RB3 Gen 2 (Proxy)
   - RB5 (Proxy)
+  - SA8540 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -33,6 +35,8 @@ aggregated:
   - Qcs6490
   - Qcs8250
   - Qcs8550
+  - Sa8540p
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -42,11 +46,11 @@ models:
 - name: YOLOv8-Detection-Quantized
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 2332.0
-      throughput: 428.8164665523156
+      inference_time: 2346.0
+      throughput: 426.25745950554136
       estimated_peak_memory_range:
         min: 12288
-        max: 3599048
+        max: 2234648
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -54,7 +58,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 277
-      job_id: jwgoe1kqp
+      job_id: jnp13lm85
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -63,13 +67,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:33:51Z'
+    timestamp: '2024-06-22T23:44:37Z'
   - torchscript_onnx_tflite:
-      inference_time: 1594.0
-      throughput: 627.3525721455458
+      inference_time: 1597.0
+      throughput: 626.1740763932373
       estimated_peak_memory_range:
         min: 12288
-        max: 49918192
+        max: 54532816
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -77,7 +81,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 277
-      job_id: j1pvz1rkg
+      job_id: jvgd09mrp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -86,13 +90,13 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:33:52Z'
+    timestamp: '2024-06-22T23:44:38Z'
   - torchscript_onnx_tflite:
-      inference_time: 2326.0
-      throughput: 429.9226139294927
+      inference_time: 2337.0
+      throughput: 427.89901583226356
       estimated_peak_memory_range:
         min: 12288
-        max: 2668824
+        max: 2345960
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -100,7 +104,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 1
         total_layers: 277
-      job_id: j7gjk02v5
+      job_id: jz576w8vg
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -109,13 +113,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:33:53Z'
+    timestamp: '2024-06-22T23:44:39Z'
   - torchscript_onnx_tflite:
-      inference_time: 6463.0
-      throughput: 154.7269070091289
+      inference_time: 2337.0
+      throughput: 427.89901583226356
       estimated_peak_memory_range:
-        min: 81920
-        max: 33931536
+        min: 12288
+        max: 2733568
+      primary_compute_unit: NPU
+      precision: int8
+      layer_info:
+        layers_on_npu: 276
+        layers_on_gpu: 0
+        layers_on_cpu: 1
+        total_layers: 277
+      job_id: jqp48o28g
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:44:40Z'
+  - torchscript_onnx_tflite:
+      inference_time: 6534.0
+      throughput: 153.04560759106212
+      estimated_peak_memory_range:
+        min: 36864
+        max: 38744896
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -123,7 +150,7 @@ models:
         layers_on_gpu: 1
         layers_on_cpu: 1
         total_layers: 277
-      job_id: jlpe4rwo5
+      job_id: j0pxmjz3g
       job_status: Passed
     reference_device_info:
       name: RB3 Gen 2 (Proxy)
@@ -132,13 +159,13 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs6490
-    timestamp: '2024-06-08T23:33:54Z'
+    timestamp: '2024-06-22T23:44:42Z'
   - torchscript_onnx_tflite:
-      inference_time: 46343.0
-      throughput: 21.57823187967978
+      inference_time: 46991.0
+      throughput: 21.280670766742567
       estimated_peak_memory_range:
-        min: 1802240
-        max: 10846104
+        min: 2846720
+        max: 18859192
       primary_compute_unit: NPU
       precision: int8
       layer_info:
@@ -146,7 +173,7 @@ models:
         layers_on_gpu: 2
         layers_on_cpu: 1
         total_layers: 277
-      job_id: jygzvxjop
+      job_id: jo5m42ld5
       job_status: Passed
     reference_device_info:
       name: RB5 (Proxy)
@@ -155,4 +182,4 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8250
-    timestamp: '2024-06-08T23:33:55Z'
+    timestamp: '2024-06-22T23:44:43Z'
diff --git a/qai_hub_models/models/yolov8_seg/export.py b/qai_hub_models/models/yolov8_seg/export.py
index d2ecb2c9..80ec993b 100644
--- a/qai_hub_models/models/yolov8_seg/export.py
+++ b/qai_hub_models/models/yolov8_seg/export.py
@@ -38,7 +38,7 @@
 
 
 def export_model(
-    device: str = "Samsung Galaxy S23",
+    device: str = "Samsung Galaxy S23 (Family)",
     chipset: Optional[str] = None,
     skip_profiling: bool = False,
     skip_inferencing: bool = False,
@@ -117,7 +117,6 @@ def export_model(
     )
 
     # Trace the model
-    model.eval()
     source_model = torch.jit.trace(
         model.to("cpu"), make_torch_inputs(input_spec), check_trace=False
     )
@@ -125,7 +124,7 @@ def export_model(
     # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
     channel_last_flags = (
         " --force_channel_last_input image" + " --force_channel_last_output output_4"
-        if target_runtime != TargetRuntime.ORT
+        if target_runtime != TargetRuntime.ONNX
         else ""
     )
 
@@ -171,7 +170,7 @@ def export_model(
         # Convert inputs from channel first to channel last
         hub_inputs = (
             sample_inputs
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_first_to_last("image", sample_inputs, target_runtime)
         )
         submitted_inference_job = hub.submit_inference_job(
@@ -189,7 +188,7 @@ def export_model(
             target_runtime_extension = "so"
         elif target_runtime == TargetRuntime.TFLITE:
             target_runtime_extension = "tflite"
-        elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}:
+        elif target_runtime in {TargetRuntime.ONNX, TargetRuntime.PRECOMPILED_QNN_ONNX}:
             target_runtime_extension = "onnx"
 
         os.makedirs(output_path, exist_ok=True)
@@ -211,7 +210,7 @@ def export_model(
         # Convert outputs from channel last to channel first
         inference_result = (
             inference_result
-            if target_runtime == TargetRuntime.ORT
+            if target_runtime == TargetRuntime.ONNX
             else transpose_channel_last_to_first(
                 "output_4", inference_result, target_runtime
             )
@@ -229,7 +228,7 @@ def export_model(
 def main():
     warnings.filterwarnings("ignore")
     parser = export_parser(
-        model_cls=Model, supports_qnn=False, supports_precompiled_ort=False
+        model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False
     )
     args = parser.parse_args()
     export_model(**vars(args))
diff --git a/qai_hub_models/models/yolov8_seg/model.py b/qai_hub_models/models/yolov8_seg/model.py
index bbd5fee2..7b64a222 100644
--- a/qai_hub_models/models/yolov8_seg/model.py
+++ b/qai_hub_models/models/yolov8_seg/model.py
@@ -43,7 +43,6 @@ def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS):
                 f"Supported checkpoints are {list(SUPPORTED_WEIGHTS)}."
             )
         model = ultralytics_YOLO(ckpt_name).model
-        model.eval()
         return cls(model)
 
     def forward(self, image: torch.Tensor):
diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml
index b39496c1..031576ab 100644
--- a/qai_hub_models/models/yolov8_seg/perf.yaml
+++ b/qai_hub_models/models/yolov8_seg/perf.yaml
@@ -9,6 +9,7 @@ aggregated:
   - Google Pixel 4a
   - Google Pixel 5a 5G
   - QCS8550 (Proxy)
+  - SA8775 (Proxy)
   - Samsung Galaxy S21
   - Samsung Galaxy S21 Ultra
   - Samsung Galaxy S21+
@@ -27,6 +28,7 @@ aggregated:
   - Xiaomi 12 Pro
   supported_chipsets:
   - Qcs8550
+  - Sa8775p
   - Snapdragon® 8 Gen 1
   - Snapdragon® 8 Gen 2
   - Snapdragon® 8 Gen 3
@@ -36,26 +38,26 @@ models:
 - name: YOLOv8-Segmentation
   performance_metrics:
   - torchscript_onnx_tflite:
-      inference_time: 7329.0
-      throughput: 136.4442625187611
+      inference_time: 6377.0
+      throughput: 156.81354869060686
       estimated_peak_memory_range:
-        min: 4210688
-        max: 6975488
+        min: 4595712
+        max: 7752744
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 337
+        layers_on_npu: 336
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 337
-      job_id: jo5mvw6d5
+        total_layers: 336
+      job_id: j1p383r35
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 7942.0
-      throughput: 125.91286829513976
+    torchscript_onnx:
+      inference_time: 7841.0
+      throughput: 127.53475322025253
       estimated_peak_memory_range:
-        min: 14696448
-        max: 42029952
+        min: 18956288
+        max: 44241296
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -63,7 +65,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 336
-      job_id: jogkrlqw5
+      job_id: jvgd09yrp
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S23
@@ -72,28 +74,28 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 2
-    timestamp: '2024-06-08T23:34:30Z'
+    timestamp: '2024-06-22T23:45:26Z'
   - torchscript_onnx_tflite:
-      inference_time: 5452.0
-      throughput: 183.41892883345562
+      inference_time: 4708.0
+      throughput: 212.40441801189465
       estimated_peak_memory_range:
-        min: 3268608
-        max: 101106816
+        min: 36864
+        max: 100005056
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 337
+        layers_on_npu: 336
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 337
-      job_id: jegnr9mk5
+        total_layers: 336
+      job_id: jwgom09q5
       job_status: Passed
-    torchscript_onnx_ort:
-      inference_time: 5339.0
-      throughput: 187.30099269526127
+    torchscript_onnx:
+      inference_time: 5363.0
+      throughput: 186.46280067126608
       estimated_peak_memory_range:
-        min: 16973824
-        max: 81417296
+        min: 16642048
+        max: 73744656
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -101,7 +103,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 336
-      job_id: jn5q97rnp
+      job_id: jz576w1vg
       job_status: Passed
     reference_device_info:
       name: Samsung Galaxy S24
@@ -110,21 +112,21 @@ models:
       os_name: Android
       manufacturer: Samsung
       chipset: Snapdragon® 8 Gen 3
-    timestamp: '2024-06-08T23:34:31Z'
+    timestamp: '2024-06-22T23:45:27Z'
   - torchscript_onnx_tflite:
-      inference_time: 7404.0
-      throughput: 135.06212857914642
+      inference_time: 6318.0
+      throughput: 158.27793605571384
       estimated_peak_memory_range:
-        min: 4583424
-        max: 7403760
+        min: 4599808
+        max: 29252752
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
-        layers_on_npu: 337
+        layers_on_npu: 336
         layers_on_gpu: 0
         layers_on_cpu: 0
-        total_layers: 337
-      job_id: jopr1420g
+        total_layers: 336
+      job_id: j1pv4oykp
       job_status: Passed
     reference_device_info:
       name: QCS8550 (Proxy)
@@ -133,13 +135,36 @@ models:
       os_name: Android
       manufacturer: Qualcomm
       chipset: Qcs8550
-    timestamp: '2024-06-08T23:34:25Z'
-  - torchscript_onnx_ort:
-      inference_time: 7762.0
-      throughput: 128.83277505797474
+    timestamp: '2024-06-22T23:45:18Z'
+  - torchscript_onnx_tflite:
+      inference_time: 6305.0
+      throughput: 158.60428231562253
+      estimated_peak_memory_range:
+        min: 4591616
+        max: 14165424
+      primary_compute_unit: NPU
+      precision: fp16
+      layer_info:
+        layers_on_npu: 336
+        layers_on_gpu: 0
+        layers_on_cpu: 0
+        total_layers: 336
+      job_id: j7gj1m6vg
+      job_status: Passed
+    reference_device_info:
+      name: SA8775 (Proxy)
+      os: '13'
+      form_factor: Auto
+      os_name: Android
+      manufacturer: Qualcomm
+      chipset: Sa8775p
+    timestamp: '2024-06-22T23:45:19Z'
+  - torchscript_onnx:
+      inference_time: 7802.0
+      throughput: 128.1722635221738
       estimated_peak_memory_range:
-        min: 22315008
-        max: 22315008
+        min: 22433792
+        max: 22433792
       primary_compute_unit: NPU
       precision: fp16
       layer_info:
@@ -147,7 +172,7 @@ models:
         layers_on_gpu: 0
         layers_on_cpu: 0
         total_layers: 336
-      job_id: j1gle02jp
+      job_id: jqp48o68g
       job_status: Passed
     reference_device_info:
       name: Snapdragon X Elite CRD
@@ -156,4 +181,4 @@ models:
       os_name: Windows
       manufacturer: Qualcomm
       chipset: Snapdragon® X Elite
-    timestamp: '2024-06-08T23:34:32Z'
+    timestamp: '2024-06-22T23:45:29Z'
diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py
index 184ab87f..c4c2ebb3 100644
--- a/qai_hub_models/utils/args.py
+++ b/qai_hub_models/utils/args.py
@@ -28,7 +28,7 @@
 from qai_hub_models.utils.inference import HubModel, compile_model_from_args
 from qai_hub_models.utils.qai_hub_helpers import can_access_qualcomm_ai_hub
 
-DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S23"
+DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S23 (Family)"
 
 
 class ParseEnumAction(argparse.Action):
@@ -40,9 +40,10 @@ def __call__(self, parser, namespace, values, option_string=None):
         setattr(namespace, self.dest, self.enum_type[values.upper().replace("-", "_")])
 
 
-def get_parser() -> argparse.ArgumentParser:
+def get_parser(allow_dupe_args: bool = False) -> argparse.ArgumentParser:
     return argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        conflict_handler="resolve" if allow_dupe_args else "error",
     )
 
 
@@ -399,15 +400,17 @@ def _evaluate_export_common_parser(
     model_cls: Type[FromPretrainedTypeVar] | Type[FromPrecompiledTypeVar],
     supports_tflite=True,
     supports_qnn=True,
-    supports_ort=True,
-    supports_precompiled_ort=True,
+    supports_onnx=True,
+    supports_precompiled_qnn_onnx=True,
     default_runtime=TargetRuntime.TFLITE,
     exporting_compiled_model=False,
 ) -> argparse.ArgumentParser:
     """
     Common arguments between export and evaluate scripts.
     """
-    parser = get_parser()
+    # Set handler to resolve, to allow from_pretrained and get_input_spec
+    # to have the same argument names.
+    parser = get_parser(allow_dupe_args=True)
 
     if not exporting_compiled_model:
         # Default runtime for compiled model is fixed for given model
@@ -416,10 +419,10 @@ def _evaluate_export_common_parser(
             available_runtimes.append(TargetRuntime.TFLITE)
         if supports_qnn:
             available_runtimes.append(TargetRuntime.QNN)
-        if supports_ort:
-            available_runtimes.append(TargetRuntime.ORT)
-        if supports_precompiled_ort:
-            available_runtimes.append(TargetRuntime.PRECOMPILED_ORT)
+        if supports_onnx:
+            available_runtimes.append(TargetRuntime.ONNX)
+        if supports_precompiled_qnn_onnx:
+            available_runtimes.append(TargetRuntime.PRECOMPILED_QNN_ONNX)
 
         default_runtime = _get_default_runtime(available_runtimes)
         add_target_runtime_arg(
@@ -459,8 +462,8 @@ def export_parser(
     components: Optional[List[str]] = None,
     supports_tflite: bool = True,
     supports_qnn: bool = True,
-    supports_ort: bool = True,
-    supports_precompiled_ort: bool = True,
+    supports_onnx: bool = True,
+    supports_precompiled_qnn_onnx: bool = True,
     default_runtime: TargetRuntime = TargetRuntime.TFLITE,
     exporting_compiled_model: bool = False,
     default_export_device: str = DEFAULT_EXPORT_DEVICE,
@@ -477,10 +480,10 @@ def export_parser(
         supports_qnn:
             Whether QNN export is supported.
             Default=True.
-        supports_ort:
+        supports_onnx:
             Whether ORT export is supported.
             Default=True.
-        supports_precompiled_ort:
+        supports_precompiled_qnn_onnx:
             Whether precompiled ORT (with QNN context binary) export is supported.
             Default=True.
         default_runtime: Which runtime to use as default if not specified in cli args.
@@ -498,8 +501,8 @@ def export_parser(
         model_cls=model_cls,
         supports_tflite=supports_tflite,
         supports_qnn=supports_qnn,
-        supports_ort=supports_ort,
-        supports_precompiled_ort=supports_precompiled_ort,
+        supports_onnx=supports_onnx,
+        supports_precompiled_qnn_onnx=supports_precompiled_qnn_onnx,
         default_runtime=default_runtime,
         exporting_compiled_model=exporting_compiled_model,
     )
@@ -563,7 +566,7 @@ def evaluate_parser(
     supported_datasets: List[str],
     supports_tflite=True,
     supports_qnn=True,
-    supports_ort=True,
+    supports_onnx=True,
     default_runtime=TargetRuntime.TFLITE,
 ) -> argparse.ArgumentParser:
     """
@@ -578,7 +581,7 @@ def evaluate_parser(
         supports_qnn:
             Whether QNN export is supported.
             Default=True.
-        supports_ort:
+        supports_onnx:
             Whether ORT export is supported.
             Default=True.
         exporting_compiled_model:
@@ -594,7 +597,7 @@ def evaluate_parser(
         model_cls=model_cls,
         supports_tflite=supports_tflite,
         supports_qnn=supports_qnn,
-        supports_ort=supports_ort,
+        supports_onnx=supports_onnx,
         default_runtime=default_runtime,
     )
     parser.add_argument(
diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py
index adf4b2bc..eb1b0760 100644
--- a/qai_hub_models/utils/asset_loaders.py
+++ b/qai_hub_models/utils/asset_loaders.py
@@ -969,19 +969,21 @@ def download_file(web_url: str, dst_path: str, num_retries: int = 4) -> str:
 
         # Streaming, so we can iterate over the response.
         response = requests.get(web_url, stream=True)
+        if response.status_code != 200:
+            raise ValueError(f"Unable to download file at {web_url}")
 
         # Sizes in bytes.
         total_size = int(response.headers.get("content-length", 0))
         block_size = 1024
 
-        with tqdm(total=total_size, unit="B", unit_scale=True) as progress_bar:
-            with open(dst_path, "wb") as file:
-                for data in response.iter_content(block_size):
-                    progress_bar.update(len(data))
-                    file.write(data)
-
-        if response.status_code != 200:
-            raise ValueError(f"Unable to download file at {web_url}")
+        with qaihm_temp_dir() as tmp_dir:
+            tmp_filepath = os.path.join(tmp_dir, Path(dst_path).name)
+            with tqdm(total=total_size, unit="B", unit_scale=True) as progress_bar:
+                with open(tmp_filepath, "wb") as file:
+                    for data in response.iter_content(block_size):
+                        progress_bar.update(len(data))
+                        file.write(data)
+            os.rename(tmp_filepath, dst_path)
         print("Done")
     return dst_path
 
diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py
index 377fc357..26bc9d26 100644
--- a/qai_hub_models/utils/base_model.py
+++ b/qai_hub_models/utils/base_model.py
@@ -4,6 +4,8 @@
 # ---------------------------------------------------------------------
 from __future__ import annotations
 
+from contextlib import nullcontext
+from copy import deepcopy
 from pathlib import Path
 from typing import Any, List, Optional
 
@@ -102,6 +104,26 @@ class BaseModel(
     def __init__(self):
         torch.nn.Module.__init__(self)  # Initialize Torch Module
         HubModel.__init__(self)  # Initialize Hub Model
+        self.eval()
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        """
+        When a new torch.nn.Module attribute is added, we want to set it to eval mode.
+            If this model is being trained, calling `model.train()`
+            will reverse all of these.
+        """
+        if isinstance(value, torch.nn.Module) and not self.training:
+            value.eval()
+        torch.nn.Module.__setattr__(self, name, value)
+
+    def __call__(self, *args, **kwargs):
+        """
+        If a model is in eval mode (which equates to self.training == False),
+            we don't want to compute gradients when doing the forward pass.
+        """
+        context_fn = nullcontext if self.training else torch.no_grad
+        with context_fn():
+            return torch.nn.Module.__call__(self, *args, **kwargs)
 
     def convert_to_torchscript(
         self, input_spec: InputSpec | None = None, check_trace: bool = True
@@ -115,8 +137,14 @@ def convert_to_torchscript(
         if not input_spec:
             input_spec = self.get_input_spec()
 
+        # Torchscript should never be trained, so disable gradients for all parameters.
+        # Need to do this on a model copy, in case the original model is being trained.
+        model_copy = deepcopy(self)
+        for param in model_copy.parameters():
+            param.requires_grad = False
+
         return torch.jit.trace(
-            self, make_torch_inputs(input_spec), check_trace=check_trace
+            model_copy, make_torch_inputs(input_spec), check_trace=check_trace
         )
 
     def convert_to_hub_source_model(
@@ -174,12 +202,12 @@ def get_hub_compile_options(
                             break
 
                 target_runtime_flag = target_runtime_flag or "qnn_lib_aarch64_android"
-            elif target_runtime == TargetRuntime.ORT:
+            elif target_runtime == TargetRuntime.ONNX:
                 target_runtime_flag = "onnx"
             elif target_runtime == TargetRuntime.TFLITE:
                 target_runtime_flag = "tflite"
-            elif target_runtime == TargetRuntime.PRECOMPILED_ORT:
-                target_runtime_flag = "compiled_qnn_onnx"
+            elif target_runtime == TargetRuntime.PRECOMPILED_QNN_ONNX:
+                target_runtime_flag = "precompiled_qnn_onnx"
             else:
                 raise NotImplementedError()
 
diff --git a/qai_hub_models/utils/compare.py b/qai_hub_models/utils/compare.py
index 8b887ba9..f9ae5ac5 100644
--- a/qai_hub_models/utils/compare.py
+++ b/qai_hub_models/utils/compare.py
@@ -43,8 +43,7 @@ def torch_inference(
             inputs[input_name] = torch.from_numpy(sample_inputs[input_name][i]).to(
                 "cpu"
             )
-        with torch.no_grad():
-            out = model(*inputs.values())
+        out = model(*inputs.values())
         out_tuple = (out,) if isinstance(out, torch.Tensor) else out
         out_tuple = _flatten_tuple(out_tuple)
 
diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py
index 0c36432f..c61fbda9 100644
--- a/qai_hub_models/utils/config_loaders.py
+++ b/qai_hub_models/utils/config_loaders.py
@@ -100,6 +100,7 @@
     "lgpl-lr",
     "deepfloyd-if-license",
     "llama2",
+    "llama3",
     "unknown",
     "other",
 }
@@ -484,7 +485,7 @@ def __init__(
         has_on_target_demo: bool,
         qnn_export_failure_reason: str,
         tflite_export_failure_reason: str,
-        ort_export_failure_reason: str,
+        onnx_export_failure_reason: str,
         check_trace: bool,
         channel_last_input: List[str],
         channel_last_output: List[str],
@@ -507,7 +508,7 @@ def __init__(
         self.has_on_target_demo = has_on_target_demo
         self.qnn_export_failure_reason = qnn_export_failure_reason
         self.tflite_export_failure_reason = tflite_export_failure_reason
-        self.ort_export_failure_reason = ort_export_failure_reason
+        self.onnx_export_failure_reason = onnx_export_failure_reason
         self.check_trace = check_trace
         self.channel_last_input = channel_last_input
         self.channel_last_output = channel_last_output
@@ -548,7 +549,7 @@ def from_yaml(
             code_gen_config["has_on_target_demo"],
             code_gen_config["qnn_export_failure_reason"],
             code_gen_config["tflite_export_failure_reason"],
-            code_gen_config["ort_export_failure_reason"],
+            code_gen_config["onnx_export_failure_reason"],
             code_gen_config["check_trace"],
             code_gen_config["channel_last_input"],
             code_gen_config["channel_last_output"],
@@ -577,7 +578,7 @@ def from_yaml(
                 OptionalSchema("has_on_target_demo", default=False): bool,
                 OptionalSchema("qnn_export_failure_reason", default=""): str,
                 OptionalSchema("tflite_export_failure_reason", default=""): str,
-                OptionalSchema("ort_export_failure_reason", default=""): str,
+                OptionalSchema("onnx_export_failure_reason", default=""): str,
                 OptionalSchema("check_trace", default=True): bool,
                 OptionalSchema("channel_last_input", default=[]): list,
                 OptionalSchema("channel_last_output", default=[]): list,
@@ -743,6 +744,7 @@ def validate(self) -> Tuple[bool, Optional[str]]:
             if (
                 self.code_gen_config.tflite_export_failure_reason
                 and self.code_gen_config.qnn_export_failure_reason
+                and self.code_gen_config.onnx_export_failure_reason
             ):
                 return False, "Public models must support at least one export path"
 
diff --git a/qai_hub_models/utils/display.py b/qai_hub_models/utils/display.py
index c628b0ff..5a48fc96 100644
--- a/qai_hub_models/utils/display.py
+++ b/qai_hub_models/utils/display.py
@@ -94,5 +94,5 @@ def display_or_save_image(
     if display_image(image, desc):
         return True
 
-    save_image(image, str(Path.cwd() / "build"), filename, desc)
+    save_image(image, os.path.join(Path.cwd(), "build"), filename, desc)
     return False
diff --git a/qai_hub_models/utils/evaluate.py b/qai_hub_models/utils/evaluate.py
index 33d8774b..314faad4 100644
--- a/qai_hub_models/utils/evaluate.py
+++ b/qai_hub_models/utils/evaluate.py
@@ -368,10 +368,9 @@ def evaluate_on_dataset(
         else:
             on_device_results.append(hub_model(model_inputs.split(1, dim=0)))
 
-        with torch.no_grad():
-            for model_input, ground_truth in zip(model_inputs, ground_truth_values):
-                torch_output = torch_model(model_input.unsqueeze(0))
-                torch_evaluator.add_batch(torch_output, ground_truth.unsqueeze(0))
+        for model_input, ground_truth in zip(model_inputs, ground_truth_values):
+            torch_output = torch_model(model_input.unsqueeze(0))
+            torch_evaluator.add_batch(torch_output, ground_truth.unsqueeze(0))
         print(
             f"Cumulative torch accuracy on batch {i + 1}/{num_batches}: "
             f"{torch_evaluator.formatted_accuracy()}"
diff --git a/qai_hub_models/utils/printing.py b/qai_hub_models/utils/printing.py
index 1074a3d4..1adc37db 100644
--- a/qai_hub_models/utils/printing.py
+++ b/qai_hub_models/utils/printing.py
@@ -98,7 +98,7 @@ def print_profile_metrics_from_job(
     elif is_qnn_hub_model(profile_job.model):
         runtime = TargetRuntime.QNN
     elif profile_job.model.model_type in [SourceModelType.ORT, SourceModelType.ONNX]:
-        runtime = TargetRuntime.ORT
+        runtime = TargetRuntime.ONNX
     else:
         raise NotImplementedError()
 
diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py
index 22a6a29c..fbd845e6 100644
--- a/qai_hub_models/utils/quantization_aimet.py
+++ b/qai_hub_models/utils/quantization_aimet.py
@@ -444,7 +444,7 @@ def get_calibration_data(
         """
         Calibration dataset for this model and input spec.
         """
-        if target_runtime == TargetRuntime.ORT:
+        if target_runtime == TargetRuntime.ONNX:
             # TODO(#10896): Restore quantize_io flag when targeting ORT
             return None
 
@@ -460,7 +460,10 @@ def get_hub_compile_options(
         device: Optional[Device] = None,
     ) -> str:
         quantization_flags = " --quantize_io"
-        if target_runtime not in [TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT]:
+        if target_runtime not in [
+            TargetRuntime.ONNX,
+            TargetRuntime.PRECOMPILED_QNN_ONNX,
+        ]:
             quantization_flags += " --quantize_full_type int8"
         return (
             super().get_hub_compile_options(  # type: ignore
@@ -473,3 +476,11 @@ def preferred_hub_source_model_format(
         self, target_runtime: TargetRuntime
     ) -> SourceModelFormat:
         return SourceModelFormat.ONNX
+
+    def __call__(self, *args, **kwargs):
+        """
+        Instance of AIMETQuantizableMixin should never be trained,
+            so should be safe to disable gradients during forward pass.
+        """
+        with torch.no_grad():
+            return super().__call__(*args, **kwargs)
diff --git a/qai_hub_models/utils/scorecard/common.py b/qai_hub_models/utils/scorecard/common.py
index a8395ec0..8077244a 100644
--- a/qai_hub_models/utils/scorecard/common.py
+++ b/qai_hub_models/utils/scorecard/common.py
@@ -32,6 +32,10 @@ class ScorecardDevice(Enum):
     cs_8250 = 4
     cs_8550 = 5
     cs_x_elite = 6
+    cs_auto_lemans_8255 = 7
+    cs_auto_lemans_8775 = 8
+    cs_auto_lemans_8650 = 9
+    # cs_auto_makena_8540  | Disabled until fp16 support is enabled for makena.
 
     def enabled(self) -> bool:
         valid_test_devices = os.environ.get("WHITELISTED_PROFILE_TEST_DEVICES", "ALL")
@@ -41,6 +45,24 @@ def enabled(self) -> bool:
             or self.name in valid_test_devices.split(",")
         )
 
+    def get_disabled_models(self) -> List[str]:
+        """
+        Each chipset can have a list of 'disabled' models, for which the
+        chipset won't show up as a 'supported chipset' for that model.
+        """
+        if self == ScorecardDevice.cs_6490:
+            return [
+                "ConvNext-Tiny-w8a8-Quantized",
+                "ConvNext-Tiny-w8a16-Quantized",
+                "ResNet50Quantized",
+                "RegNetQuantized",
+                "HRNetPoseQuantized",
+                "SESR-M5-Quantized",
+                "Midas-V2-Quantized",
+                "Posenet-Mobilenet-Quantized",
+            ]
+        return []
+
     def all_enabled(self) -> List["ScorecardDevice"]:
         return [x for x in ScorecardDevice if x.enabled()]
 
@@ -57,6 +79,14 @@ def get_reference_device(self) -> hub.Device:
             return _get_cached_device("QCS8550 (Proxy)")
         if self == ScorecardDevice.cs_x_elite:
             return _get_cached_device("Snapdragon X Elite CRD")
+        if self == ScorecardDevice.cs_auto_lemans_8255:
+            return _get_cached_device("SA8255 (Proxy)")
+        if self == ScorecardDevice.cs_auto_lemans_8775:
+            return _get_cached_device("SA8775 (Proxy)")
+        if self == ScorecardDevice.cs_auto_lemans_8650:
+            return _get_cached_device("SA8650 (Proxy)")
+        # if self == ScorecardDevice.cs_auto_makena_8540:
+        #    return _get_cached_device("SA8540 (Proxy)")
         raise NotImplementedError(f"No reference device for {self.name}")
 
     def get_chipset(self) -> str:
@@ -72,6 +102,14 @@ def get_chipset(self) -> str:
             return "qualcomm-qcs8550"
         if self == ScorecardDevice.cs_x_elite:
             return "qualcomm-snapdragon-x-elite"
+        if self == ScorecardDevice.cs_auto_lemans_8255:
+            return "qualcomm-sa8255p"
+        if self == ScorecardDevice.cs_auto_lemans_8775:
+            return "qualcomm-sa8775p"
+        if self == ScorecardDevice.cs_auto_lemans_8650:
+            return "qualcomm-sa8650p"
+        # if self == ScorecardDevice.cs_auto_makena_8540:
+        #    return "qualcomm-sa8540p"
         raise NotImplementedError(f"No chipset for {self.name}")
 
     def get_os(self) -> str:
@@ -84,7 +122,7 @@ def get_os(self) -> str:
 class ScorecardCompilePath(Enum):
     TFLITE = 0
     QNN = 1
-    ORT = 2
+    ONNX = 2
 
     def __str__(self):
         return self.name.lower()
@@ -122,8 +160,8 @@ def get_parameterized_test_config(
     def get_runtime(self) -> TargetRuntime:
         if self == ScorecardCompilePath.TFLITE:
             return TargetRuntime.TFLITE
-        if self == ScorecardCompilePath.ORT:
-            return TargetRuntime.ORT
+        if self == ScorecardCompilePath.ONNX:
+            return TargetRuntime.ONNX
         if self == ScorecardCompilePath.QNN:
             return TargetRuntime.QNN
         raise NotImplementedError()
@@ -155,14 +193,16 @@ def get_job_cache_name(
 class ScorecardProfilePath(Enum):
     TFLITE = 0
     QNN = 1
-    ORT = 2
-    ORT_DML_GPU = 3
+    ONNX = 2
+    ONNX_DML_GPU = 3
 
     def __str__(self):
         return self.name.lower()
 
     @property
     def long_name(self):
+        if self.name.lower() == "onnx":
+            return f"torchscript_{self.name.lower()}"
         return f"torchscript_onnx_{self.name.lower()}"
 
     def enabled(self) -> bool:
@@ -179,7 +219,7 @@ def all_enabled() -> List["ScorecardProfilePath"]:
     def include_in_perf_yaml(self) -> bool:
         return self in [
             ScorecardProfilePath.QNN,
-            ScorecardProfilePath.ORT,
+            ScorecardProfilePath.ONNX,
             ScorecardProfilePath.TFLITE,
         ]
 
@@ -201,8 +241,8 @@ def get_parameterized_test_config(
     def get_runtime(self) -> TargetRuntime:
         if self == ScorecardProfilePath.TFLITE:
             return TargetRuntime.TFLITE
-        if self in [ScorecardProfilePath.ORT, ScorecardProfilePath.ORT_DML_GPU]:
-            return TargetRuntime.ORT
+        if self in [ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_GPU]:
+            return TargetRuntime.ONNX
         if self == ScorecardProfilePath.QNN:
             return TargetRuntime.QNN
         raise NotImplementedError()
@@ -210,14 +250,14 @@ def get_runtime(self) -> TargetRuntime:
     def get_compile_path(self) -> ScorecardCompilePath:
         if self == ScorecardProfilePath.TFLITE:
             return ScorecardCompilePath.TFLITE
-        if self in [ScorecardProfilePath.ORT, ScorecardProfilePath.ORT_DML_GPU]:
-            return ScorecardCompilePath.ORT
+        if self in [ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_GPU]:
+            return ScorecardCompilePath.ONNX
         if self == ScorecardProfilePath.QNN:
             return ScorecardCompilePath.QNN
         raise NotImplementedError()
 
     def get_profile_options(self) -> str:
-        if self == ScorecardProfilePath.ORT_DML_GPU:
+        if self == ScorecardProfilePath.ONNX_DML_GPU:
             return "--compute_unit gpu"
         return ""
 
@@ -234,7 +274,7 @@ def get_test_devices(
                 if aimet_model
                 else []
             )
-        elif self == ScorecardProfilePath.ORT:
+        elif self == ScorecardProfilePath.ONNX:
             devices = [
                 ScorecardDevice.cs_8_gen_2,
                 ScorecardDevice.cs_8_gen_3,
@@ -246,8 +286,11 @@ def get_test_devices(
                 ScorecardDevice.cs_8_gen_3,
                 ScorecardDevice.cs_x_elite,
                 ScorecardDevice.cs_8550,
+                ScorecardDevice.cs_auto_lemans_8650,
+                ScorecardDevice.cs_auto_lemans_8775,
+                ScorecardDevice.cs_auto_lemans_8255,
             ]
-        elif self == ScorecardProfilePath.ORT_DML_GPU:
+        elif self == ScorecardProfilePath.ONNX_DML_GPU:
             devices = [ScorecardDevice.cs_x_elite]
         else:
             raise NotImplementedError()
diff --git a/qai_hub_models/utils/scorecard/model_card.py b/qai_hub_models/utils/scorecard/model_card.py
index 15849492..ea618fd2 100644
--- a/qai_hub_models/utils/scorecard/model_card.py
+++ b/qai_hub_models/utils/scorecard/model_card.py
@@ -316,10 +316,22 @@ def from_runs(model_runs: List[ProfileJobSummary]):
 
     def get_chipsets(self) -> Set[str]:
         chips: Set[str] = set()
-        for _, model_summary in self.runs_per_model.items():
-            chips.update(
-                [x.get_chipset() for x in model_summary.runs_per_device.keys()]
-            )
+        for model_id, model_summary in self.runs_per_model.items():
+            for device, device_summary in model_summary.runs_per_device.items():
+                # At least 1 successful run must exist for this chipset
+                success = False
+                for run in device_summary.run_per_path.values():
+                    if run.success:
+                        success = True
+                        break
+                if not success:
+                    continue
+
+                # Don't incude disabled models
+                if model_id in device.get_disabled_models():
+                    continue
+
+                chips.add(device.get_chipset())
         return chips
 
     def get_perf_card(
diff --git a/scripts/util/env_create.sh b/scripts/util/env_create.sh
index 71b85de8..3e7c066d 100755
--- a/scripts/util/env_create.sh
+++ b/scripts/util/env_create.sh
@@ -36,6 +36,7 @@ if [ ! -d "$ENV_PATH" ]; then
 
   echo "Activating virtual env."
   source "$ENV_PATH/bin/activate"
+  pip install pip==24.0
 else
   source "$ENV_PATH/bin/activate"
   echo "Env created already. Skipping creation."