v0.11.3

See https://github.com/quic/ai-hub-models/releases/v0.11.3 for changelog. Signed-off-by: QAIHM Team <[email protected]>
quic · Aug 13, 2024 · 746d851 · 746d851
1 parent 23facd8
commit 746d851
Show file tree

Hide file tree

Showing 269 changed files with 23,899 additions and 14,412 deletions.
diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py
@@ -2,4 +2,4 @@
 # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
-__version__ = "0.11.2"
+__version__ = "0.11.3"
diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/model.py b/qai_hub_models/models/_shared/cityscapes_segmentation/model.py
@@ -96,3 +96,11 @@ def get_input_spec(
     @staticmethod
     def get_output_names() -> List[str]:
         return ["mask"]
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image"]
+
+    @staticmethod
+    def get_channel_last_outputs() -> List[str]:
+        return ["mask"]
diff --git a/qai_hub_models/models/_shared/deeplab/model.py b/qai_hub_models/models/_shared/deeplab/model.py
@@ -59,3 +59,11 @@ def get_input_spec(
     @staticmethod
     def get_output_names() -> List[str]:
         return ["mask"]
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image"]
+
+    @staticmethod
+    def get_channel_last_outputs() -> List[str]:
+        return ["mask"]
diff --git a/qai_hub_models/models/_shared/detr/model.py b/qai_hub_models/models/_shared/detr/model.py
@@ -62,3 +62,7 @@ def get_input_spec(
     @staticmethod
     def get_output_names() -> List[str]:
         return ["logits", "boxes"]
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image"]
diff --git a/qai_hub_models/models/_shared/fastsam/model.py b/qai_hub_models/models/_shared/fastsam/model.py
@@ -57,3 +57,11 @@ def get_input_spec(
     @staticmethod
     def get_output_names() -> List[str]:
         return ["boxes", "mask"]
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image"]
+
+    @staticmethod
+    def get_channel_last_outputs() -> List[str]:
+        return ["boxes", "mask"]
diff --git a/qai_hub_models/models/_shared/imagenet_classifier/model.py b/qai_hub_models/models/_shared/imagenet_classifier/model.py
@@ -115,8 +115,12 @@ def from_pretrained(
         net = cls.model_builder(weights=weights or cls.DEFAULT_WEIGHTS)
         return cls(net)
 
-    def sample_inputs(
+    def _sample_inputs_impl(
         self, input_spec: InputSpec | None = None
     ) -> Dict[str, List[np.ndarray]]:
         samples = get_image_quantization_samples()
         return dict(image_tensor=[samples[:1].numpy()])
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image_tensor"]
diff --git a/qai_hub_models/models/_shared/llama/model.py b/qai_hub_models/models/_shared/llama/model.py
@@ -219,13 +219,20 @@ def get_hub_compile_options(
         other_compile_options: str = "",
         device: Optional[Device] = None,
     ) -> str:
-        if target_runtime != TargetRuntime.QNN:
+        if (
+            target_runtime != TargetRuntime.QNN
+            and target_runtime != TargetRuntime.PRECOMPILED_QNN_ONNX
+        ):
             raise RuntimeError(
                 f"Unsupported target_runtime provided: {target_runtime}."
-                " Only QNN runtime is supported for Llama for now."
+                " Only Precompile ONN ONNX or QNN runtime is supported for Llama for now."
             )
-
-        return " --target_runtime qnn_context_binary --quantize_full_type w8a16 --quantize_io"
+        target_runtime_options = (
+            " --target_runtime qnn_context_binary"
+            if target_runtime == TargetRuntime.QNN
+            else " --target_runtime precompiled_qnn_onnx"
+        )
+        return target_runtime_options + " --quantize_full_type w8a16 --quantize_io"
 
     @staticmethod
     def get_output_names(
@@ -246,7 +253,9 @@ def get_output_names(
         )
         return output_list
 
-    def sample_inputs(self, input_spec: Optional[InputSpec] = None) -> SampleInputsType:
+    def _sample_inputs_impl(
+        self, input_spec: Optional[InputSpec] = None
+    ) -> SampleInputsType:
         data = self.get_calibration_data(input_spec=input_spec)
         for key, val in data.items():
             data[key] = [val.detach().numpy()]

diff --git a/qai_hub_models/models/_shared/super_resolution/model.py b/qai_hub_models/models/_shared/super_resolution/model.py
@@ -70,3 +70,11 @@ def get_input_spec(
     @staticmethod
     def get_output_names() -> List[str]:
         return ["upscaled_image"]
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image"]
+
+    @staticmethod
+    def get_channel_last_outputs() -> List[str]:
+        return ["upscaled_image"]
diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py
@@ -32,8 +32,6 @@
 from qai_hub_models.utils.qai_hub_helpers import (
     can_access_qualcomm_ai_hub,
     export_without_hub_access,
-    transpose_channel_first_to_last,
-    transpose_channel_last_to_first,
 )
 
 
@@ -110,6 +108,9 @@ def export_model(
             profile_options,
         )
 
+    # On-device perf improves with I/O in channel_last format except when using ONNX.
+    use_channel_last_format = target_runtime != TargetRuntime.ONNX
+
     # 1. Initialize PyTorch model
     model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs))
     input_spec = model.get_input_spec(
@@ -119,17 +120,9 @@ def export_model(
     # Trace the model
     source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec))
 
-    # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite)
-    channel_last_flags = (
-        " --force_channel_last_input image,mask"
-        + " --force_channel_last_output painted_image"
-        if target_runtime != TargetRuntime.ONNX
-        else ""
-    )
-
     # 2. Compile the model to an on-device asset
     model_compile_options = model.get_hub_compile_options(
-        target_runtime, compile_options + channel_last_flags, hub_device
+        target_runtime, compile_options, hub_device
     )
     print(f"Optimizing model {model_name} to run on-device")
     submitted_compile_job = hub.submit_compile_job(
@@ -165,18 +158,12 @@ def export_model(
         print(
             f"Running inference for {model_name} on a hosted device with example inputs."
         )
-        sample_inputs = model.sample_inputs(input_spec)
-        # Convert inputs from channel first to channel last
-        hub_inputs = (
-            sample_inputs
-            if target_runtime == TargetRuntime.ONNX
-            else transpose_channel_first_to_last(
-                "image,mask", sample_inputs, target_runtime
-            )
+        sample_inputs = model.sample_inputs(
+            input_spec, use_channel_last_format=use_channel_last_format
         )
         submitted_inference_job = hub.submit_inference_job(
             model=compile_job.get_target_model(),
-            inputs=hub_inputs,
+            inputs=sample_inputs,
             device=hub_device,
             name=model_name,
             options=profile_options_all,
@@ -205,18 +192,16 @@ def export_model(
         print_profile_metrics_from_job(profile_job, profile_data)
 
     if not skip_summary and not skip_inferencing:
-        torch_out = torch_inference(model, sample_inputs)
+        sample_inputs = model.sample_inputs(use_channel_last_format=False)
+        torch_out = torch_inference(
+            model, sample_inputs, return_channel_last_output=use_channel_last_format
+        )
         assert inference_job is not None and inference_job.wait().success
         inference_result: hub.client.DatasetEntries = inference_job.download_output_data()  # type: ignore
-        # Convert outputs from channel last to channel first
-        inference_result = (
-            inference_result
-            if target_runtime == TargetRuntime.ONNX
-            else transpose_channel_last_to_first(
-                "painted_image", inference_result, target_runtime
-            )
+
+        print_inference_metrics(
+            inference_job, inference_result, torch_out, model.get_output_names()
         )
-        print_inference_metrics(inference_job, inference_result, torch_out)
 
     if not skip_summary:
         print_on_target_demo_cmd(compile_job, Path(__file__).parent.resolve(), device)

diff --git a/qai_hub_models/models/aotgan/model.py b/qai_hub_models/models/aotgan/model.py
@@ -128,7 +128,9 @@ def get_input_spec(
     def get_output_names() -> List[str]:
         return ["painted_image"]
 
-    def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType:
+    def _sample_inputs_impl(
+        self, input_spec: InputSpec | None = None
+    ) -> SampleInputsType:
         """
         Provides an example image of a man with a mask over the glasses.
         """
@@ -138,3 +140,11 @@ def sample_inputs(self, input_spec: InputSpec | None = None) -> SampleInputsType
         mask = load_image(MASK_ADDRESS)
         torch_inputs = RepaintMaskApp.preprocess_inputs(image, mask)
         return {k: [v.detach().numpy()] for k, v in torch_inputs.items()}
+
+    @staticmethod
+    def get_channel_last_inputs() -> List[str]:
+        return ["image", "mask"]
+
+    @staticmethod
+    def get_channel_last_outputs() -> List[str]:
+        return ["painted_image"]